{"cells":[{"cell_type":"markdown","source":["# 1.Preparation"],"metadata":{"id":"Ll08ZcPICXjy"}},{"cell_type":"markdown","source":["Connect to Google drive"],"metadata":{"id":"ZFv572DsD1HL"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7ZQ84WjGITux","executionInfo":{"status":"ok","timestamp":1730838049326,"user_tz":300,"elapsed":29355,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"}},"outputId":"c6acc52d-30c9-4890-d159-f694f8f1bdad"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"markdown","source":["Change this to your own address."],"metadata":{"id":"3gIkEbflCrzr"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2343,"status":"ok","timestamp":1730838068494,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"},"user_tz":300},"id":"Y2bAkVcLJnL1","outputId":"3177ef79-a297-4a1b-e80a-5657323774d6"},"outputs":[{"output_type":"stream","name":"stdout","text":[" Add_Clustered_news_0.6\t\t\t\t     HON_2023-10-30_2024-09-30.json\n","'Benchmark with Llama3 8b Data.ipynb'\t\t     KO_2023-10-30_2024-09-30.json\n"," Clustered_news_0.6\t\t\t\t     MCD_2023-10-30_2024-09-30.json\n"," config.json\t\t\t\t\t    'News repetition reduction.ipynb'\n"," dataset_dict.json\t\t\t\t     __pycache__\n"," DIS_2023-10-30_2024-09-30.json\t\t\t     test\n"," finetuned_models\t\t\t\t     test_demo.ipynb\n"," fingpt-forecaster\t\t\t\t     train\n"," fingpt-forecaster-1029\t\t\t\t     train_lora.py\n"," fingpt-forecaster-1102\t\t\t\t     train.sh\n"," fingpt-forecaster-1105\t\t\t\t     Untitled\n"," fingpt-forecaster-dow-30-20230930-20240930-1-4-06   utils.py\n"," fingpt-forecaster-dow-30-20231030-20240930-1-4-06   wandb\n"," HD_2023-10-30_2024-09-30.json\n"]}],"source":["!ls \"/content/drive/MyDrive/Your own address\""]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1027,"status":"ok","timestamp":1730838072202,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"},"user_tz":300},"id":"hXdXz4gQIW-s","outputId":"caf67953-4185-4735-e4ec-3e7f1335e947"},"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Colab Notebooks/AI4Finance/FinForecaster/Benchmark with Llama3 8b Data\n"]}],"source":["%cd \"/content/drive/MyDrive/Your own address\""]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":30693,"status":"ok","timestamp":1730838104298,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"},"user_tz":300},"id":"gaN7bj1vKJoJ","outputId":"3efec6e9-f0ae-48ee-cd2b-a3a7ff314436"},"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.0+cu121)\n","Collecting datasets\n","  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n","Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (0.13.2)\n","Requirement already satisfied: wandb in /usr/local/lib/python3.10/dist-packages (0.18.5)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.16.1)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.24.7)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.9.11)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n","Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.6)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.10.0)\n","Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n","Collecting dill<0.3.9,>=0.3.0 (from datasets)\n","  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n","Collecting xxhash (from datasets)\n","  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n","Collecting multiprocess<0.70.17 (from datasets)\n","  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n","Collecting fsspec (from torch)\n","  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n","Requirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from peft) (0.34.2)\n","Requirement already satisfied: click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb) (8.1.7)\n","Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (0.4.0)\n","Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.1.43)\n","Requirement already satisfied: platformdirs in /usr/local/lib/python3.10/dist-packages (from wandb) (4.3.6)\n","Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.20.3)\n","Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (2.17.0)\n","Requirement already satisfied: setproctitle in /usr/local/lib/python3.10/dist-packages (from wandb) (1.3.3)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb) (75.1.0)\n","Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.10/dist-packages (from docker-pycreds>=0.4.0->wandb) (1.16.0)\n","Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n","Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.17.0)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n","Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.11)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.2.3)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.8.30)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n","Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.1)\n","Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n","Downloading datasets-3.1.0-py3-none-any.whl (480 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: xxhash, fsspec, dill, multiprocess, datasets\n","  Attempting uninstall: fsspec\n","    Found existing installation: fsspec 2024.10.0\n","    Uninstalling fsspec-2024.10.0:\n","      Successfully uninstalled fsspec-2024.10.0\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n","\u001b[0mSuccessfully installed datasets-3.1.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 xxhash-3.5.0\n","Collecting deepspeed\n","  Downloading deepspeed-0.15.3.tar.gz (1.4 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m36.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting hjson (from deepspeed)\n","  Downloading hjson-3.1.0-py3-none-any.whl.metadata (2.6 kB)\n","Requirement already satisfied: msgpack in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.1.0)\n","Collecting ninja (from deepspeed)\n","  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from deepspeed) (1.26.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed) (24.1)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from deepspeed) (5.9.5)\n","Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed) (9.0.0)\n","Requirement already satisfied: pydantic>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed) (2.9.2)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from deepspeed) (2.5.0+cu121)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from deepspeed) (4.66.6)\n","Collecting nvidia-ml-py (from deepspeed)\n","  Downloading nvidia_ml_py-12.560.30-py3-none-any.whl.metadata (8.6 kB)\n","Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0.0->deepspeed) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0.0->deepspeed) (2.23.4)\n","Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0.0->deepspeed) (4.12.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.16.1)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.4.2)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (3.1.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (2024.9.0)\n","Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed) (1.13.1)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch->deepspeed) (1.3.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->deepspeed) (3.0.2)\n","Downloading hjson-3.1.0-py3-none-any.whl (54 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m27.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading nvidia_ml_py-12.560.30-py3-none-any.whl (40 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hBuilding wheels for collected packages: deepspeed\n","  Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for deepspeed: filename=deepspeed-0.15.3-py3-none-any.whl size=1526205 sha256=bfed5756cc35d7fda60645355b0daceb7e6256a139a3dad4bdb930a2ddc17800\n","  Stored in directory: /root/.cache/pip/wheels/b3/c2/9f/37a2c813b8d64d7908793319cfdfa4f852754e177f20f0b858\n","Successfully built deepspeed\n","Installing collected packages: nvidia-ml-py, ninja, hjson, deepspeed\n","Successfully installed deepspeed-0.15.3 hjson-3.1.0 ninja-1.11.1.1 nvidia-ml-py-12.560.30\n","Collecting rouge-score\n","  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n","  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.4.0)\n","Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score) (3.8.1)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.26.4)\n","Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score) (1.16.0)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (8.1.7)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (1.4.2)\n","Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (2024.9.11)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score) (4.66.6)\n","Building wheels for collected packages: rouge-score\n","  Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=5e4083d77d9a21f4705b4242ea56de520a09047ebd9dc854fd693ec23994161a\n","  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n","Successfully built rouge-score\n","Installing collected packages: rouge-score\n","Successfully installed rouge-score-0.1.2\n"]}],"source":["!pip install transformers torch datasets peft wandb\n","!pip install deepspeed\n","!pip install rouge-score"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2145,"status":"ok","timestamp":1730838110126,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"},"user_tz":300},"id":"7qq3kEezKLkG","outputId":"290ab638-69c3-4c58-8ab4-fcefb40969b9"},"outputs":[{"output_type":"stream","name":"stdout","text":["True\n","NVIDIA A100-SXM4-40GB\n"]}],"source":["import torch\n","print(torch.cuda.is_available())\n","print(torch.cuda.get_device_name(0))"]},{"cell_type":"markdown","source":["# 2.Data Loading and Formatting"],"metadata":{"id":"mxBlA3wCC2PF"}},{"cell_type":"code","source":["from datasets import load_from_disk\n","\n","\n","train_dataset = load_from_disk('/content/drive/MyDrive/Your Trainging data address/')"],"metadata":{"id":"-K4_IXjmS761","executionInfo":{"status":"error","timestamp":1735134054927,"user_tz":-480,"elapsed":1238,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"}},"colab":{"base_uri":"https://localhost:8080/","height":380},"outputId":"96aaa89b-b9cc-444d-f0b3-433bd9187a98"},"execution_count":1,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"No module named 'datasets'","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)","\u001b[0;32m<ipython-input-1-ecf56265c103>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mdatasets\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mload_from_disk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mtrain_dataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_from_disk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/content/drive/MyDrive/Your Trainging data address/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'datasets'","","\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"],"errorDetails":{"actions":[{"action":"open_url","actionText":"Open Examples","url":"/notebooks/snippets/importing_libraries.ipynb"}]}}]},{"cell_type":"code","source":["def get_unique_ticker_symbols(test_dataset):\n","\n","    ticker_symbols = set()\n","\n","\n","    for i in range(len(test_dataset)):\n","        prompt_content = test_dataset[i]['prompt']\n","\n","\n","        ticker_symbol = re.search(r\"ticker\\s([A-Z]+)\", prompt_content)\n","\n","\n","        if ticker_symbol:\n","            ticker_symbols.add(ticker_symbol.group(1))\n","\n","\n","    return list(ticker_symbols)\n","\n","\n","\n","def insert_guidance_after_intro(prompt):\n","\n","\n","    intro_marker = (\n","        \"[INST]<<SYS>>\\n\"\n","        \"You are a seasoned stock market analyst. Your task is to list the positive developments and \"\n","        \"potential concerns for companies based on relevant news and basic financials from the past weeks, \"\n","        \"then provide an analysis and prediction for the companies' stock price movement for the upcoming week.\"\n","    )\n","    guidance_start_marker = \"Based on all the information before\"\n","    guidance_end_marker = \"Following these instructions, please come up with 2-4 most important positive factors\"\n","\n","\n","    intro_pos = prompt.find(intro_marker)\n","    guidance_start_pos = prompt.find(guidance_start_marker)\n","    guidance_end_pos = prompt.find(guidance_end_marker)\n","\n","\n","    if intro_pos == -1 or guidance_start_pos == -1 or guidance_end_pos == -1:\n","        return prompt\n","\n","\n","    guidance_section = prompt[guidance_start_pos:guidance_end_pos].strip()\n","\n","\n","    new_prompt = (\n","        f\"{prompt[:intro_pos + len(intro_marker)]}\\n\\n\"\n","        f\"{guidance_section}\\n\\n\"\n","        f\"{prompt[intro_pos + len(intro_marker):guidance_start_pos]}\"\n","        f\"{prompt[guidance_end_pos:]}\"\n","    )\n","\n","    return new_prompt\n","\n","\n","def apply_to_all_prompts_in_dataset(test_dataset):\n","\n","\n","    updated_dataset = test_dataset.map(lambda x: {\"prompt\": insert_guidance_after_intro(x[\"prompt\"])})\n","\n","    return updated_dataset"],"metadata":{"id":"WbnoctfhUi6X"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train_dataset = apply_to_all_prompts_in_dataset(train_dataset)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["ec45cd4da15f4b998a1e185764f5b16d","e57edd2bb9b74676bfdcf66b12c8cc5c","f3f7a941eff946d88f082a482fec1f11","857ccdeccef04eddb1a833c0cfb8535f","74e35720b9074ccdba2e495ffd0895dd","3927b957970548da96f830871fffdbf9","b666036aa98f42d48a97b224da3fd77c","41032241335949a18ed6d8b7e167b581","e5e886e61cf549539175829a2274fdb4","f7fcd72791de411983343b0e69dbddeb","87f8a4e0e5f14751a7f19273118aaafe"]},"id":"lDruyX1GTEcg","executionInfo":{"status":"ok","timestamp":1730838131458,"user_tz":300,"elapsed":1030,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"}},"outputId":"2c366b9c-1041-4866-9b60-4a99f1422286"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/784 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ec45cd4da15f4b998a1e185764f5b16d"}},"metadata":{}}]},{"cell_type":"markdown","source":["Check the trining data"],"metadata":{"id":"CxOcBkigDcU7"}},{"cell_type":"code","source":["train_dataset['prompt'][0]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":383},"id":"ZOuxO8GqU1P7","executionInfo":{"status":"ok","timestamp":1730838197794,"user_tz":300,"elapsed":1023,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"}},"outputId":"e85e6437-f94b-41dc-ac59-8731cdf88a4a"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["\"[INST]<<SYS>>\\nYou are a seasoned stock market analyst. Your task is to list the positive developments and potential concerns for companies based on relevant news and basic financials from the past weeks, then provide an analysis and prediction for the companies' stock price movement for the upcoming week. Your answer format should be as follows:\\n\\n[Positive Developments]:\\n1. ...\\n\\n[Potential Concerns]:\\n1. ...\\n\\n[Prediction & Analysis]\\nPrediction: ...\\nAnalysis: ...\\n\\n<</SYS>>\\n\\n[Company Introduction]:\\n\\nMicrosoft Corp is a leading entity in the Technology sector. Incorporated and publicly traded since 1986-03-13, the company has established its reputation as one of the key players in the market. As of today, Microsoft Corp has a market capitalization of 3051052.10 in USD, with 7433.04 shares outstanding.\\n\\nMicrosoft Corp operates primarily in the US, trading under the ticker MSFT on the NASDAQ NMS - GLOBAL MARKET. As a dominant force in the Technology space, the company continues to innovate and drive progress within the industry.\\n\\nFrom 2023-11-05 to 2023-11-12, MSFT's stock price increased from 350.17 to 366.92. The stock price each day during this period is [353.874, 357.844, 360.494, 358.003, 366.916], and the corresponding daily return is given by [0.0106, 0.0112, 0.0074, -0.006900000000000001, 0.024900000000000002]. News during this period are listed below:\\n\\n[Topic]: Microsoft Corp. stock outperforms competitors on strong trading day\\n[Topic Duration]:2023-11-07 to 2023-11-12\\n[News Headline]:Microsoft Corp. stock outperforms competitors on strong trading day\\n[News Summary]: Shares of Microsoft Corp. rose 2.49% to $369.67 Friday, on what proved to be an all-around favorable trading session for the stock market, with the S&P 500...\\n[Topic Size]:2\\n\\n[Topic]: You'll Never Guess Who's Beating OpenAI in Generative Artificial Intelligence\\n[Topic Duration]:2023-11-07 to 2023-11-10\\n[News Headline]:You'll Never Guess Who's Beating OpenAI in Generative Artificial Intelligence\\n[News Summary]: OpenAI burst onto the scene about a year ago with the launch of ChatGPT, introducing the world to the power of generative AI.  It's since been seen as the leader in artificial intelligence, garnering a $10 billion investment from Microsoft (NASDAQ: MSFT).  While OpenAI's GPT-4 is seen as the standard bearer, and it recently released an update on that model, one company is making quick progress in the space, even exceeding GPT-4's performance in some applications.\\n[Topic Size]:2\\n\\n[Topic]: AvePoint, Inc. (NASDAQ:AVPT) Q3 2023 Earnings Call Transcript\\n[Topic Duration]:2023-11-07 to 2023-11-12\\n[News Headline]:AvePoint, Inc. (NASDAQ:AVPT) Q3 2023 Earnings Call Transcript\\n[News Summary]: AvePoint, Inc. (NASDAQ:AVPT) Q3 2023 Earnings Call Transcript November 9, 2023 Operator: Good day and welcome to the AvePoint Inc. Q3 2023 Earnings Call. Today, all participants will be in a listen-only mode. [Operator Instructions] After today’s presentation there will be an opportunity to ask questions. [Operator Instructions] Please note that today’s event is being […]\\n[Topic Size]:2\\n\\n[Topic]: 3 Top Tech Stocks to Buy in November\\n[Topic Duration]:2023-11-07 to 2023-11-09\\n[News Headline]:3 Top Tech Stocks to Buy in November\\n[News Summary]: These companies dominate their respective markets, and you won't want to miss out on their long-term potential.\\n[Topic Size]:6\\n\\n[Topic]: Tech Titans: 2 Top Artificial Intelligence (AI) Stocks to Watch Heading Into 2024\\n[Topic Duration]:2023-11-08 to 2023-11-12\\n[News Headline]:Tech Titans: 2 Top Artificial Intelligence (AI) Stocks to Watch Heading Into 2024\\n[News Summary]: Although the AI revolution was unofficially kicked off in late 2022 when ChatGPT rose to fame, 2023 saw many incredible releases by some of the biggest tech names in the industry.  The top two names I'm watching in this space in 2024 are Nvidia (NASDAQ: NVDA) and Microsoft (NASDAQ: MSFT).  Nvidia has been the undisputed king of the AI movement, as its graphics processing units (GPUs) are the bedrock for the computers that create AI models.\\n[Topic Size]:12\\n\\n[Topic]: Nvidia: Assuring Processor Dominance By Technology And Strategic Investments\\n[Topic Duration]:2023-11-09 to 2023-11-10\\n[News Headline]:Nvidia: Assuring Processor Dominance By Technology And Strategic Investments\\n[News Summary]: Nvidia faces competition from Intel and AMD in cloud AI market, but strategic investments and top technology ensure dominance. Learn more on NVDA stock here.\\n[Topic Size]:2\\n\\n[Basic Financials]:\\n\\nNo basic financial reported.\\n\\nBased on all the information before 2023-11-12 and the following guidelines, let's analyze the positive developments and potential concerns for AAPL. Apart from stock price information and basic financials, several news topcis are given and they are derived from news clustering. Each topic contains its duration, a key news headline and summary, and a topic size, indicating the number of related news items within that cluster. First, note that some of the factors only influence the price for the following 1 or 2 days while some others may have longer-term effects. Second, the topics that are closer to 2023-11-12 are likely to have a stronger influence on the upcoming stock price forecast. Third, take into account the topic size: larger clusters represent greater market attention and, consequently, likely exert more influence on the stock price. Following these instructions, please come up with 2-4 most important positive factors and 2-4 most significant negative effects and keep them concise. Most factors should be inferred from company related news.Then make your prediction of the MSFT price movement for next week (2023-11-12 to 2023-11-19). Provide a summary analysis to support your prediction.[/INST]\""],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":12}]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":335,"status":"ok","timestamp":1730838142431,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"},"user_tz":300},"id":"OmVMmLfzKQ-2","outputId":"8f1a7316-6b68-4256-8dd8-2643548a8758"},"outputs":[{"output_type":"stream","name":"stdout","text":["The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n","Token is valid (permission: write).\n","Your token has been saved to /root/.cache/huggingface/token\n","Login successful\n"]}],"source":["from huggingface_hub import login\n","\n","# 输入你的 Hugging Face Access Token\n","login(token=\"Use your own key to make the call for the base model\")"]},{"cell_type":"markdown","source":["# 3.Executive training document"],"metadata":{"id":"AR6wkcg9Dgt_"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":12826876,"status":"ok","timestamp":1730851040185,"user":{"displayName":"Yuncong Liu","userId":"08340999060190968949"},"user_tz":300},"id":"sGY8yZvMLHHv","outputId":"56d8963c-9422-42eb-a626-509a7d973b36"},"outputs":[{"output_type":"stream","name":"stdout","text":["[2024-11-05 20:23:36,145] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n","[2024-11-05 20:23:42,089] [WARNING] [runner.py:215:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.\n","[2024-11-05 20:23:42,089] [INFO] [runner.py:607:main] cmd = /usr/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None train_lora.py --run_name llama3-8b-a100-5e-5lr --base_model llama3 --dataset /content/drive/MyDrive/Colab Notebooks/AI4Finance/FinForecaster/Benchmark with Llama3 8b Data/fingpt-forecaster-1105/train/ --test_dataset /content/drive/MyDrive/Colab Notebooks/AI4Finance/FinForecaster/Benchmark with Llama3 8b Data/fingpt-forecaster-1105/test/ --max_length 8000 --batch_size 2 --gradient_accumulation_steps 8 --learning_rate 5e-5 --num_epochs 5 --log_interval 10 --warmup_ratio 0.03 --scheduler constant --evaluation_strategy steps --ds_config config.json\n","[2024-11-05 20:23:43,889] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.19.3-1+cuda12.2\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NV_LIBNCCL_DEV_PACKAGE_VERSION=2.19.3-1\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NCCL_VERSION=2.19.3-1\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-dev\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NV_LIBNCCL_PACKAGE=libnccl2=2.19.3-1+cuda12.2\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NCCL_IGNORE_DISABLED_P2P=1\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NV_LIBNCCL_PACKAGE_NAME=libnccl2\n","[2024-11-05 20:23:47,226] [INFO] [launch.py:139:main] 0 NV_LIBNCCL_PACKAGE_VERSION=2.19.3-1\n","[2024-11-05 20:23:47,227] [INFO] [launch.py:146:main] WORLD INFO DICT: {'localhost': [0]}\n","[2024-11-05 20:23:47,227] [INFO] [launch.py:152:main] nnodes=1, num_local_procs=1, node_rank=0\n","[2024-11-05 20:23:47,227] [INFO] [launch.py:163:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0]})\n","[2024-11-05 20:23:47,227] [INFO] [launch.py:164:main] dist_world_size=1\n","[2024-11-05 20:23:47,227] [INFO] [launch.py:168:main] Setting CUDA_VISIBLE_DEVICES=0\n","[2024-11-05 20:23:47,227] [INFO] [launch.py:256:main] process 3936 spawned with command: ['/usr/bin/python3', '-u', 'train_lora.py', '--local_rank=0', '--run_name', 'llama3-8b-a100-5e-5lr', '--base_model', 'llama3', '--dataset', '/content/drive/MyDrive/Colab Notebooks/AI4Finance/FinForecaster/Benchmark with Llama3 8b Data/fingpt-forecaster-1105/train/', '--test_dataset', '/content/drive/MyDrive/Colab Notebooks/AI4Finance/FinForecaster/Benchmark with Llama3 8b Data/fingpt-forecaster-1105/test/', '--max_length', '8000', '--batch_size', '2', '--gradient_accumulation_steps', '8', '--learning_rate', '5e-5', '--num_epochs', '5', '--log_interval', '10', '--warmup_ratio', '0.03', '--scheduler', 'constant', '--evaluation_strategy', 'steps', '--ds_config', 'config.json']\n","2024-11-05 20:23:51.582831: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n","2024-11-05 20:23:51.598708: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-11-05 20:23:51.620479: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-11-05 20:23:51.627300: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-11-05 20:23:51.643049: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-11-05 20:23:52.804146: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n","\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33myl5440\u001b[0m (\u001b[33myl5440-columbia-university\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n","config.json: 100% 654/654 [00:00<00:00, 4.53MB/s]\n","model.safetensors.index.json: 100% 23.9k/23.9k [00:00<00:00, 70.5MB/s]\n","Downloading shards:   0% 0/4 [00:00<?, ?it/s]\n","model-00001-of-00004.safetensors:   0% 0.00/4.98G [00:00<?, ?B/s]\u001b[A\n","model-00001-of-00004.safetensors:   0% 10.5M/4.98G [00:00<03:07, 26.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   0% 21.0M/4.98G [00:00<02:24, 34.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 31.5M/4.98G [00:00<02:23, 34.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 41.9M/4.98G [00:01<02:10, 37.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 52.4M/4.98G [00:01<02:04, 39.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 62.9M/4.98G [00:01<02:00, 40.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   1% 73.4M/4.98G [00:01<01:57, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   2% 83.9M/4.98G [00:02<01:55, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   2% 94.4M/4.98G [00:02<01:54, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   2% 105M/4.98G [00:02<01:53, 43.0MB/s] \u001b[A\n","model-00001-of-00004.safetensors:   2% 115M/4.98G [00:02<01:52, 43.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   3% 126M/4.98G [00:03<01:51, 43.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   3% 136M/4.98G [00:03<01:51, 43.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   3% 147M/4.98G [00:03<01:50, 43.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   3% 157M/4.98G [00:03<01:51, 43.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   3% 168M/4.98G [00:04<01:50, 43.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 178M/4.98G [00:04<01:50, 43.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 189M/4.98G [00:04<01:49, 43.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 199M/4.98G [00:04<01:49, 43.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 210M/4.98G [00:05<01:49, 43.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   4% 220M/4.98G [00:05<01:49, 43.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   5% 231M/4.98G [00:05<01:49, 43.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   5% 241M/4.98G [00:05<01:49, 43.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   5% 252M/4.98G [00:06<01:49, 43.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   5% 262M/4.98G [00:06<01:49, 43.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   5% 273M/4.98G [00:06<01:48, 43.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   6% 283M/4.98G [00:06<01:48, 43.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   6% 294M/4.98G [00:06<01:48, 43.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   6% 304M/4.98G [00:07<01:48, 43.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   6% 315M/4.98G [00:07<01:48, 42.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 325M/4.98G [00:07<01:48, 42.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 336M/4.98G [00:07<01:48, 42.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 346M/4.98G [00:08<01:47, 43.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 357M/4.98G [00:08<01:47, 43.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   7% 367M/4.98G [00:08<01:46, 43.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   8% 377M/4.98G [00:08<01:50, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   8% 388M/4.98G [00:09<01:48, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   8% 398M/4.98G [00:09<01:47, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   8% 409M/4.98G [00:09<01:47, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   8% 419M/4.98G [00:09<01:50, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 430M/4.98G [00:10<01:47, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 440M/4.98G [00:10<01:46, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 451M/4.98G [00:10<01:46, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 461M/4.98G [00:10<01:49, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:   9% 472M/4.98G [00:11<01:47, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  10% 482M/4.98G [00:11<01:46, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  10% 493M/4.98G [00:11<01:45, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  10% 503M/4.98G [00:11<01:48, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  10% 514M/4.98G [00:12<01:46, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  11% 524M/4.98G [00:12<01:45, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  11% 535M/4.98G [00:12<01:44, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  11% 545M/4.98G [00:12<01:47, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  11% 556M/4.98G [00:13<01:45, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  11% 566M/4.98G [00:13<01:44, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 577M/4.98G [00:13<01:44, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 587M/4.98G [00:13<01:46, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 598M/4.98G [00:14<01:44, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 608M/4.98G [00:14<01:43, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  12% 619M/4.98G [00:14<01:42, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  13% 629M/4.98G [00:14<01:45, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  13% 640M/4.98G [00:15<01:43, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  13% 650M/4.98G [00:15<01:42, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  13% 661M/4.98G [00:15<01:41, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  13% 671M/4.98G [00:15<01:44, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  14% 682M/4.98G [00:16<01:42, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  14% 692M/4.98G [00:16<01:41, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  14% 703M/4.98G [00:16<01:40, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  14% 713M/4.98G [00:16<01:43, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 724M/4.98G [00:17<01:41, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 734M/4.98G [00:17<01:40, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 744M/4.98G [00:17<01:39, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 755M/4.98G [00:17<01:42, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  15% 765M/4.98G [00:18<01:40, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  16% 776M/4.98G [00:18<01:39, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  16% 786M/4.98G [00:18<01:38, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  16% 797M/4.98G [00:18<01:41, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  16% 807M/4.98G [00:19<01:38, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  16% 818M/4.98G [00:19<01:37, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  17% 828M/4.98G [00:19<01:37, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  17% 839M/4.98G [00:19<01:40, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  17% 849M/4.98G [00:20<01:38, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  17% 860M/4.98G [00:20<01:37, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  17% 870M/4.98G [00:20<01:37, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  18% 881M/4.98G [00:20<01:39, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  18% 891M/4.98G [00:21<01:37, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  18% 902M/4.98G [00:21<01:36, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  18% 912M/4.98G [00:21<01:35, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  19% 923M/4.98G [00:21<01:38, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  19% 933M/4.98G [00:22<01:36, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  19% 944M/4.98G [00:22<01:35, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  19% 954M/4.98G [00:22<01:34, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  19% 965M/4.98G [00:22<01:37, 41.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 975M/4.98G [00:23<01:35, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 986M/4.98G [00:23<01:34, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 996M/4.98G [00:23<01:33, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 1.01G/4.98G [00:23<01:36, 41.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  20% 1.02G/4.98G [00:24<01:34, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  21% 1.03G/4.98G [00:24<01:33, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  21% 1.04G/4.98G [00:24<01:32, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  21% 1.05G/4.98G [00:24<01:35, 41.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  21% 1.06G/4.98G [00:25<01:33, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  21% 1.07G/4.98G [00:25<01:33, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  22% 1.08G/4.98G [00:25<01:32, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  22% 1.09G/4.98G [00:25<01:34, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  22% 1.10G/4.98G [00:26<01:32, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  22% 1.11G/4.98G [00:26<01:31, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.12G/4.98G [00:26<01:30, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.13G/4.98G [00:26<01:33, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.14G/4.98G [00:27<01:31, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.15G/4.98G [00:27<01:30, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  23% 1.16G/4.98G [00:27<01:29, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  24% 1.17G/4.98G [00:27<01:32, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  24% 1.18G/4.98G [00:28<01:30, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  24% 1.20G/4.98G [00:28<01:30, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  24% 1.21G/4.98G [00:28<01:30, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  24% 1.22G/4.98G [00:28<01:30, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  25% 1.23G/4.98G [00:29<01:29, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  25% 1.24G/4.98G [00:29<01:28, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  25% 1.25G/4.98G [00:29<01:27, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  25% 1.26G/4.98G [00:29<01:29, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  25% 1.27G/4.98G [00:30<01:28, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  26% 1.28G/4.98G [00:30<01:27, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  26% 1.29G/4.98G [00:30<01:26, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  26% 1.30G/4.98G [00:30<01:29, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  26% 1.31G/4.98G [00:31<01:27, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  27% 1.32G/4.98G [00:31<01:26, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  27% 1.33G/4.98G [00:31<01:25, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  27% 1.34G/4.98G [00:31<01:28, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  27% 1.35G/4.98G [00:32<01:26, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  27% 1.36G/4.98G [00:32<01:26, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.37G/4.98G [00:32<01:25, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.38G/4.98G [00:32<01:26, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.39G/4.98G [00:33<01:25, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.41G/4.98G [00:33<01:24, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  28% 1.42G/4.98G [00:33<01:23, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.43G/4.98G [00:33<01:26, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.44G/4.98G [00:34<01:24, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.45G/4.98G [00:34<01:23, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.46G/4.98G [00:34<01:22, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  29% 1.47G/4.98G [00:34<01:25, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  30% 1.48G/4.98G [00:35<01:23, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  30% 1.49G/4.98G [00:35<01:22, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  30% 1.50G/4.98G [00:35<01:22, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  30% 1.51G/4.98G [00:35<01:23, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  31% 1.52G/4.98G [00:36<01:22, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  31% 1.53G/4.98G [00:36<01:21, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  31% 1.54G/4.98G [00:36<01:20, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  31% 1.55G/4.98G [00:36<01:23, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  31% 1.56G/4.98G [00:37<01:21, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  32% 1.57G/4.98G [00:37<01:20, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  32% 1.58G/4.98G [00:37<01:19, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  32% 1.59G/4.98G [00:37<01:22, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  32% 1.60G/4.98G [00:38<01:20, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  32% 1.61G/4.98G [00:38<01:19, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  33% 1.63G/4.98G [00:38<01:19, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  33% 1.64G/4.98G [00:38<01:20, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  33% 1.65G/4.98G [00:39<01:19, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  33% 1.66G/4.98G [00:39<01:18, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.67G/4.98G [00:39<01:18, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.68G/4.98G [00:39<01:19, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.69G/4.98G [00:40<01:18, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.70G/4.98G [00:40<01:17, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  34% 1.71G/4.98G [00:40<01:17, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  35% 1.72G/4.98G [00:40<01:18, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  35% 1.73G/4.98G [00:41<01:17, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  35% 1.74G/4.98G [00:41<01:16, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  35% 1.75G/4.98G [00:41<01:15, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  35% 1.76G/4.98G [00:41<01:18, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.77G/4.98G [00:42<01:16, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.78G/4.98G [00:42<01:15, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.79G/4.98G [00:42<01:14, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.80G/4.98G [00:42<01:17, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  36% 1.81G/4.98G [00:43<01:15, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  37% 1.82G/4.98G [00:43<01:14, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  37% 1.84G/4.98G [00:43<01:14, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  37% 1.85G/4.98G [00:43<01:15, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  37% 1.86G/4.98G [00:44<01:14, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  38% 1.87G/4.98G [00:44<01:13, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  38% 1.88G/4.98G [00:44<01:12, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  38% 1.89G/4.98G [00:44<01:15, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  38% 1.90G/4.98G [00:45<01:13, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  38% 1.91G/4.98G [00:45<01:12, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.92G/4.98G [00:45<01:12, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.93G/4.98G [00:45<01:14, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.94G/4.98G [00:46<01:12, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.95G/4.98G [00:46<01:11, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  39% 1.96G/4.98G [00:46<01:10, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  40% 1.97G/4.98G [00:46<01:13, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  40% 1.98G/4.98G [00:47<01:11, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  40% 1.99G/4.98G [00:47<01:10, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  40% 2.00G/4.98G [00:47<01:10, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  40% 2.01G/4.98G [00:47<01:11, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  41% 2.02G/4.98G [00:48<01:10, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  41% 2.03G/4.98G [00:48<01:09, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  41% 2.04G/4.98G [00:48<01:08, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  41% 2.06G/4.98G [00:48<01:11, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  42% 2.07G/4.98G [00:49<01:09, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  42% 2.08G/4.98G [00:49<01:08, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  42% 2.09G/4.98G [00:49<01:08, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  42% 2.10G/4.98G [00:49<01:10, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  42% 2.11G/4.98G [00:50<01:08, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  43% 2.12G/4.98G [00:50<01:07, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  43% 2.13G/4.98G [00:50<01:07, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  43% 2.14G/4.98G [00:50<01:08, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  43% 2.15G/4.98G [00:51<01:07, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  43% 2.16G/4.98G [00:51<01:06, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  44% 2.17G/4.98G [00:51<01:05, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  44% 2.18G/4.98G [00:51<01:07, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  44% 2.19G/4.98G [00:52<01:06, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  44% 2.20G/4.98G [00:52<01:05, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  44% 2.21G/4.98G [00:52<01:05, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  45% 2.22G/4.98G [00:52<01:06, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  45% 2.23G/4.98G [00:53<01:05, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  45% 2.24G/4.98G [00:53<01:04, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  45% 2.25G/4.98G [00:53<01:04, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.26G/4.98G [00:53<01:05, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.28G/4.98G [00:54<01:04, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.29G/4.98G [00:54<01:03, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.30G/4.98G [00:54<01:03, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  46% 2.31G/4.98G [00:54<01:04, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  47% 2.32G/4.98G [00:55<01:03, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  47% 2.33G/4.98G [00:55<01:02, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  47% 2.34G/4.98G [00:55<01:02, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  47% 2.35G/4.98G [00:55<01:03, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  47% 2.36G/4.98G [00:56<01:02, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.37G/4.98G [00:56<01:01, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.38G/4.98G [00:56<01:01, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.39G/4.98G [00:56<01:02, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.40G/4.98G [00:57<01:01, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  48% 2.41G/4.98G [00:57<01:01, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  49% 2.42G/4.98G [00:57<01:00, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  49% 2.43G/4.98G [00:57<01:01, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  49% 2.44G/4.98G [00:58<01:00, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  49% 2.45G/4.98G [00:58<00:59, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  50% 2.46G/4.98G [00:58<00:59, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  50% 2.47G/4.98G [00:58<01:00, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  50% 2.49G/4.98G [00:59<00:59, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  50% 2.50G/4.98G [00:59<00:59, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  50% 2.51G/4.98G [00:59<00:59, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  51% 2.52G/4.98G [00:59<00:59, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  51% 2.53G/4.98G [01:00<00:58, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  51% 2.54G/4.98G [01:00<00:57, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  51% 2.55G/4.98G [01:00<00:57, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  51% 2.56G/4.98G [01:00<00:58, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.57G/4.98G [01:01<00:56, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.58G/4.98G [01:01<00:56, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.59G/4.98G [01:01<00:56, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.60G/4.98G [01:01<00:57, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  52% 2.61G/4.98G [01:02<00:56, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  53% 2.62G/4.98G [01:02<00:55, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  53% 2.63G/4.98G [01:02<00:55, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  53% 2.64G/4.98G [01:02<00:56, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  53% 2.65G/4.98G [01:03<00:55, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  54% 2.66G/4.98G [01:03<00:55, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  54% 2.67G/4.98G [01:03<00:54, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  54% 2.68G/4.98G [01:03<00:55, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  54% 2.69G/4.98G [01:04<00:54, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  54% 2.71G/4.98G [01:04<00:53, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.72G/4.98G [01:04<00:52, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.73G/4.98G [01:04<00:54, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.74G/4.98G [01:05<00:53, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.75G/4.98G [01:05<00:53, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  55% 2.76G/4.98G [01:05<00:52, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  56% 2.77G/4.98G [01:05<00:53, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  56% 2.78G/4.98G [01:06<00:51, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  56% 2.79G/4.98G [01:06<00:51, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  56% 2.80G/4.98G [01:06<00:51, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  56% 2.81G/4.98G [01:06<00:52, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  57% 2.82G/4.98G [01:07<00:51, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  57% 2.83G/4.98G [01:07<00:51, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  57% 2.84G/4.98G [01:07<00:50, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  57% 2.85G/4.98G [01:07<00:51, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.86G/4.98G [01:08<00:50, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.87G/4.98G [01:08<00:50, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.88G/4.98G [01:08<00:49, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.89G/4.98G [01:08<00:50, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  58% 2.90G/4.98G [01:09<00:49, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  59% 2.92G/4.98G [01:09<00:49, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  59% 2.93G/4.98G [01:09<00:48, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  59% 2.94G/4.98G [01:09<00:49, 41.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  59% 2.95G/4.98G [01:10<00:48, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  59% 2.96G/4.98G [01:10<00:47, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 2.97G/4.98G [01:10<00:47, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 2.98G/4.98G [01:10<00:48, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 2.99G/4.98G [01:11<00:47, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 3.00G/4.98G [01:11<00:46, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  60% 3.01G/4.98G [01:11<00:46, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  61% 3.02G/4.98G [01:11<00:47, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  61% 3.03G/4.98G [01:12<00:46, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  61% 3.04G/4.98G [01:12<00:46, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  61% 3.05G/4.98G [01:12<00:45, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  62% 3.06G/4.98G [01:12<00:46, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  62% 3.07G/4.98G [01:13<00:45, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  62% 3.08G/4.98G [01:13<00:44, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  62% 3.09G/4.98G [01:13<00:44, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  62% 3.10G/4.98G [01:13<00:45, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  63% 3.11G/4.98G [01:14<00:44, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  63% 3.12G/4.98G [01:14<00:43, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  63% 3.14G/4.98G [01:14<00:43, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  63% 3.15G/4.98G [01:14<00:44, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  63% 3.16G/4.98G [01:15<00:43, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  64% 3.17G/4.98G [01:15<00:43, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  64% 3.18G/4.98G [01:15<00:42, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  64% 3.19G/4.98G [01:15<00:43, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  64% 3.20G/4.98G [01:16<00:42, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  64% 3.21G/4.98G [01:16<00:41, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  65% 3.22G/4.98G [01:16<00:41, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  65% 3.23G/4.98G [01:16<00:42, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  65% 3.24G/4.98G [01:17<00:41, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  65% 3.25G/4.98G [01:17<00:40, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.26G/4.98G [01:17<00:40, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.27G/4.98G [01:17<00:41, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.28G/4.98G [01:18<00:40, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.29G/4.98G [01:18<00:39, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  66% 3.30G/4.98G [01:18<00:39, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  67% 3.31G/4.98G [01:18<00:40, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  67% 3.32G/4.98G [01:19<00:39, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  67% 3.33G/4.98G [01:19<00:38, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  67% 3.34G/4.98G [01:19<00:38, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  67% 3.36G/4.98G [01:19<00:39, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.37G/4.98G [01:20<00:38, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.38G/4.98G [01:20<00:38, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.39G/4.98G [01:20<00:37, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.40G/4.98G [01:20<00:38, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  68% 3.41G/4.98G [01:21<00:37, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  69% 3.42G/4.98G [01:21<00:36, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  69% 3.43G/4.98G [01:21<00:36, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  69% 3.44G/4.98G [01:21<00:37, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  69% 3.45G/4.98G [01:22<00:36, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  70% 3.46G/4.98G [01:22<00:35, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  70% 3.47G/4.98G [01:22<00:35, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  70% 3.48G/4.98G [01:22<00:36, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  70% 3.49G/4.98G [01:23<00:35, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  70% 3.50G/4.98G [01:23<00:35, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  71% 3.51G/4.98G [01:23<00:34, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  71% 3.52G/4.98G [01:23<00:35, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  71% 3.53G/4.98G [01:24<00:34, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  71% 3.54G/4.98G [01:24<00:33, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  71% 3.55G/4.98G [01:24<00:33, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  72% 3.57G/4.98G [01:24<00:34, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  72% 3.58G/4.98G [01:25<00:33, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  72% 3.59G/4.98G [01:25<00:33, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  72% 3.60G/4.98G [01:25<00:32, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  72% 3.61G/4.98G [01:25<00:33, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  73% 3.62G/4.98G [01:26<00:32, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  73% 3.63G/4.98G [01:26<00:32, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  73% 3.64G/4.98G [01:26<00:31, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  73% 3.65G/4.98G [01:26<00:32, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  74% 3.66G/4.98G [01:27<00:31, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  74% 3.67G/4.98G [01:27<00:30, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  74% 3.68G/4.98G [01:27<00:30, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  74% 3.69G/4.98G [01:27<00:31, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  74% 3.70G/4.98G [01:28<00:30, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  75% 3.71G/4.98G [01:28<00:30, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  75% 3.72G/4.98G [01:28<00:29, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  75% 3.73G/4.98G [01:28<00:30, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  75% 3.74G/4.98G [01:29<00:29, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  75% 3.75G/4.98G [01:29<00:29, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.76G/4.98G [01:29<00:28, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.77G/4.98G [01:29<00:29, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.79G/4.98G [01:30<00:28, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.80G/4.98G [01:30<00:28, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  76% 3.81G/4.98G [01:30<00:27, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  77% 3.82G/4.98G [01:30<00:28, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  77% 3.83G/4.98G [01:31<00:27, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  77% 3.84G/4.98G [01:31<00:26, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  77% 3.85G/4.98G [01:31<00:26, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  78% 3.86G/4.98G [01:31<00:27, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  78% 3.87G/4.98G [01:32<00:26, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  78% 3.88G/4.98G [01:32<00:25, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  78% 3.89G/4.98G [01:32<00:25, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  78% 3.90G/4.98G [01:32<00:26, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.91G/4.98G [01:33<00:25, 41.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.92G/4.98G [01:33<00:25, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.93G/4.98G [01:33<00:24, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.94G/4.98G [01:33<00:24, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  79% 3.95G/4.98G [01:34<00:24, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  80% 3.96G/4.98G [01:34<00:23, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  80% 3.97G/4.98G [01:34<00:23, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  80% 3.98G/4.98G [01:34<00:24, 41.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  80% 4.00G/4.98G [01:35<00:23, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  80% 4.01G/4.98G [01:35<00:23, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  81% 4.02G/4.98G [01:35<00:22, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  81% 4.03G/4.98G [01:35<00:22, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  81% 4.04G/4.98G [01:36<00:22, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  81% 4.05G/4.98G [01:36<00:22, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.06G/4.98G [01:36<00:21, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.07G/4.98G [01:36<00:21, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.08G/4.98G [01:37<00:21, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.09G/4.98G [01:37<00:21, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  82% 4.10G/4.98G [01:37<00:20, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  83% 4.11G/4.98G [01:37<00:20, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  83% 4.12G/4.98G [01:38<00:20, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  83% 4.13G/4.98G [01:38<00:20, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  83% 4.14G/4.98G [01:38<00:19, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  83% 4.15G/4.98G [01:38<00:20, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  84% 4.16G/4.98G [01:39<00:19, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  84% 4.17G/4.98G [01:39<00:19, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  84% 4.18G/4.98G [01:39<00:18, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  84% 4.19G/4.98G [01:39<00:18, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  84% 4.20G/4.98G [01:40<00:18, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  85% 4.22G/4.98G [01:40<00:18, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  85% 4.23G/4.98G [01:40<00:17, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  85% 4.24G/4.98G [01:40<00:17, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  85% 4.25G/4.98G [01:41<00:17, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.26G/4.98G [01:41<00:17, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.27G/4.98G [01:41<00:16, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.28G/4.98G [01:41<00:16, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.29G/4.98G [01:42<00:16, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  86% 4.30G/4.98G [01:42<00:16, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  87% 4.31G/4.98G [01:42<00:15, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  87% 4.32G/4.98G [01:42<00:15, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  87% 4.33G/4.98G [01:43<00:15, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  87% 4.34G/4.98G [01:43<00:15, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  87% 4.35G/4.98G [01:43<00:14, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.36G/4.98G [01:43<00:14, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.37G/4.98G [01:44<00:14, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.38G/4.98G [01:44<00:14, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.39G/4.98G [01:44<00:13, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  88% 4.40G/4.98G [01:44<00:13, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  89% 4.41G/4.98G [01:45<00:13, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  89% 4.42G/4.98G [01:45<00:13, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  89% 4.44G/4.98G [01:45<00:12, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  89% 4.45G/4.98G [01:45<00:12, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  90% 4.46G/4.98G [01:46<00:12, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  90% 4.47G/4.98G [01:46<00:12, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  90% 4.48G/4.98G [01:46<00:11, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  90% 4.49G/4.98G [01:46<00:11, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  90% 4.50G/4.98G [01:47<00:12, 36.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.51G/4.98G [01:47<00:12, 38.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.52G/4.98G [01:47<00:11, 39.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.53G/4.98G [01:48<00:11, 40.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.54G/4.98G [01:48<00:10, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  91% 4.55G/4.98G [01:48<00:10, 41.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  92% 4.56G/4.98G [01:48<00:09, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  92% 4.57G/4.98G [01:49<00:09, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  92% 4.58G/4.98G [01:49<00:09, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  92% 4.59G/4.98G [01:49<00:09, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  92% 4.60G/4.98G [01:49<00:08, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  93% 4.61G/4.98G [01:50<00:08, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  93% 4.62G/4.98G [01:50<00:08, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  93% 4.63G/4.98G [01:50<00:08, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  93% 4.65G/4.98G [01:50<00:07, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  94% 4.66G/4.98G [01:50<00:07, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  94% 4.67G/4.98G [01:51<00:07, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  94% 4.68G/4.98G [01:51<00:06, 43.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  94% 4.69G/4.98G [01:51<00:06, 42.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  94% 4.70G/4.98G [01:51<00:06, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.71G/4.98G [01:52<00:06, 42.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.72G/4.98G [01:52<00:06, 42.7MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.73G/4.98G [01:52<00:05, 42.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.74G/4.98G [01:52<00:05, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  95% 4.75G/4.98G [01:53<00:05, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  96% 4.76G/4.98G [01:53<00:05, 42.8MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  96% 4.77G/4.98G [01:53<00:04, 42.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  96% 4.78G/4.98G [01:53<00:04, 41.6MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  96% 4.79G/4.98G [01:54<00:04, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  96% 4.80G/4.98G [01:54<00:04, 42.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  97% 4.81G/4.98G [01:54<00:03, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  97% 4.82G/4.98G [01:54<00:03, 41.4MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  97% 4.83G/4.98G [01:55<00:03, 42.1MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  97% 4.84G/4.98G [01:55<00:03, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  98% 4.85G/4.98G [01:55<00:02, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  98% 4.87G/4.98G [01:55<00:02, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  98% 4.88G/4.98G [01:56<00:02, 41.9MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  98% 4.89G/4.98G [01:56<00:02, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  98% 4.90G/4.98G [01:56<00:01, 42.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.91G/4.98G [01:56<00:01, 41.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.92G/4.98G [01:57<00:01, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.93G/4.98G [01:57<00:01, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.94G/4.98G [01:57<00:00, 42.5MB/s]\u001b[A\n","model-00001-of-00004.safetensors:  99% 4.95G/4.98G [01:57<00:00, 41.2MB/s]\u001b[A\n","model-00001-of-00004.safetensors: 100% 4.96G/4.98G [01:58<00:00, 42.0MB/s]\u001b[A\n","model-00001-of-00004.safetensors: 100% 4.97G/4.98G [01:58<00:00, 42.3MB/s]\u001b[A\n","model-00001-of-00004.safetensors: 100% 4.98G/4.98G [01:58<00:00, 42.0MB/s]\n","Downloading shards:  25% 1/4 [01:59<05:57, 119.08s/it]\n","model-00002-of-00004.safetensors:   0% 0.00/5.00G [00:00<?, ?B/s]\u001b[A\n","model-00002-of-00004.safetensors:   0% 10.5M/5.00G [00:00<01:58, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   0% 21.0M/5.00G [00:00<01:58, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 31.5M/5.00G [00:00<01:56, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 41.9M/5.00G [00:00<01:56, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 52.4M/5.00G [00:01<01:56, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 62.9M/5.00G [00:01<01:56, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   1% 73.4M/5.00G [00:01<01:55, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   2% 83.9M/5.00G [00:01<01:57, 41.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   2% 94.4M/5.00G [00:02<01:57, 41.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   2% 105M/5.00G [00:02<01:56, 42.0MB/s] \u001b[A\n","model-00002-of-00004.safetensors:   2% 115M/5.00G [00:02<01:55, 42.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   3% 126M/5.00G [00:02<01:55, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   3% 136M/5.00G [00:03<01:54, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   3% 147M/5.00G [00:03<01:54, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   3% 157M/5.00G [00:03<01:53, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   3% 168M/5.00G [00:03<01:52, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   4% 178M/5.00G [00:04<01:53, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   4% 189M/5.00G [00:04<01:53, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   4% 199M/5.00G [00:04<01:52, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   4% 210M/5.00G [00:04<01:52, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   4% 220M/5.00G [00:05<01:51, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 231M/5.00G [00:05<01:52, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 241M/5.00G [00:05<01:51, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 252M/5.00G [00:05<01:51, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 262M/5.00G [00:06<01:51, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   5% 273M/5.00G [00:06<01:51, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   6% 283M/5.00G [00:06<01:50, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   6% 294M/5.00G [00:06<01:50, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   6% 304M/5.00G [00:07<01:50, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   6% 315M/5.00G [00:07<01:50, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   7% 325M/5.00G [00:07<01:50, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   7% 336M/5.00G [00:07<01:49, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   7% 346M/5.00G [00:08<01:49, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   7% 357M/5.00G [00:08<01:49, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   7% 367M/5.00G [00:08<01:49, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 377M/5.00G [00:08<01:49, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 388M/5.00G [00:09<01:48, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 398M/5.00G [00:09<01:48, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 409M/5.00G [00:09<01:47, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   8% 419M/5.00G [00:10<02:13, 34.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   9% 430M/5.00G [00:10<02:12, 34.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   9% 440M/5.00G [00:10<02:04, 36.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   9% 451M/5.00G [00:10<01:58, 38.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   9% 461M/5.00G [00:11<01:54, 39.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:   9% 472M/5.00G [00:11<01:51, 40.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 482M/5.00G [00:11<01:49, 41.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 493M/5.00G [00:11<01:48, 41.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 503M/5.00G [00:12<01:47, 41.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 514M/5.00G [00:12<01:46, 42.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  10% 524M/5.00G [00:12<01:46, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  11% 535M/5.00G [00:12<01:45, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  11% 545M/5.00G [00:13<01:45, 42.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  11% 556M/5.00G [00:13<01:45, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  11% 566M/5.00G [00:13<01:44, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  12% 577M/5.00G [00:13<01:44, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  12% 587M/5.00G [00:14<01:44, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  12% 598M/5.00G [00:14<01:43, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  12% 608M/5.00G [00:14<01:43, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  12% 619M/5.00G [00:14<01:43, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 629M/5.00G [00:15<01:43, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 640M/5.00G [00:15<01:42, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 650M/5.00G [00:15<01:42, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 661M/5.00G [00:15<01:41, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  13% 671M/5.00G [00:16<01:41, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  14% 682M/5.00G [00:16<01:41, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  14% 692M/5.00G [00:16<01:40, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  14% 703M/5.00G [00:16<01:41, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  14% 713M/5.00G [00:17<01:40, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  14% 724M/5.00G [00:17<01:40, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  15% 734M/5.00G [00:17<01:39, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  15% 744M/5.00G [00:17<01:39, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  15% 755M/5.00G [00:18<01:39, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  15% 765M/5.00G [00:18<01:39, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  16% 776M/5.00G [00:18<01:38, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  16% 786M/5.00G [00:18<01:38, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  16% 797M/5.00G [00:18<01:37, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  16% 807M/5.00G [00:19<01:38, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  16% 818M/5.00G [00:19<01:37, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 828M/5.00G [00:19<01:37, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 839M/5.00G [00:19<01:37, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 849M/5.00G [00:20<01:37, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 860M/5.00G [00:20<02:04, 33.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  17% 870M/5.00G [00:20<01:55, 35.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  18% 881M/5.00G [00:21<01:49, 37.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  18% 891M/5.00G [00:21<01:44, 39.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  18% 902M/5.00G [00:21<01:41, 40.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  18% 912M/5.00G [00:21<01:39, 41.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  18% 923M/5.00G [00:22<01:37, 41.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  19% 933M/5.00G [00:22<01:36, 42.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  19% 944M/5.00G [00:22<01:36, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  19% 954M/5.00G [00:22<01:35, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  19% 965M/5.00G [00:23<01:34, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 975M/5.00G [00:23<01:34, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 986M/5.00G [00:23<01:33, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 996M/5.00G [00:23<01:33, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 1.01G/5.00G [00:24<01:33, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  20% 1.02G/5.00G [00:24<01:33, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  21% 1.03G/5.00G [00:24<01:33, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  21% 1.04G/5.00G [00:24<01:32, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  21% 1.05G/5.00G [00:25<01:32, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  21% 1.06G/5.00G [00:25<01:32, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  21% 1.07G/5.00G [00:25<01:32, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.08G/5.00G [00:25<01:31, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.09G/5.00G [00:26<01:31, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.10G/5.00G [00:26<01:31, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.11G/5.00G [00:26<01:30, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  22% 1.12G/5.00G [00:26<01:30, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  23% 1.13G/5.00G [00:27<01:30, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  23% 1.14G/5.00G [00:27<01:29, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  23% 1.15G/5.00G [00:27<01:29, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  23% 1.16G/5.00G [00:27<01:29, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  23% 1.17G/5.00G [00:28<01:29, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  24% 1.18G/5.00G [00:28<01:28, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  24% 1.20G/5.00G [00:28<01:28, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  24% 1.21G/5.00G [00:28<01:28, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  24% 1.22G/5.00G [00:29<01:28, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.23G/5.00G [00:29<01:28, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.24G/5.00G [00:29<01:28, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.25G/5.00G [00:29<01:28, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.26G/5.00G [00:29<01:27, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  25% 1.27G/5.00G [00:30<01:27, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  26% 1.28G/5.00G [00:30<01:27, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  26% 1.29G/5.00G [00:30<01:26, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  26% 1.30G/5.00G [00:31<01:53, 32.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  26% 1.31G/5.00G [00:31<01:45, 35.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  26% 1.32G/5.00G [00:31<01:39, 37.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.33G/5.00G [00:31<01:35, 38.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.34G/5.00G [00:32<01:33, 39.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.35G/5.00G [00:32<01:30, 40.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.36G/5.00G [00:32<01:28, 41.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  27% 1.37G/5.00G [00:32<01:27, 41.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  28% 1.38G/5.00G [00:33<01:26, 41.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  28% 1.39G/5.00G [00:33<01:25, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  28% 1.41G/5.00G [00:33<01:24, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  28% 1.42G/5.00G [00:33<01:24, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  29% 1.43G/5.00G [00:34<01:23, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  29% 1.44G/5.00G [00:34<01:23, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  29% 1.45G/5.00G [00:34<01:22, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  29% 1.46G/5.00G [00:34<01:22, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  29% 1.47G/5.00G [00:35<01:22, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.48G/5.00G [00:35<01:22, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.49G/5.00G [00:35<01:22, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.50G/5.00G [00:35<01:22, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.51G/5.00G [00:36<01:22, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  30% 1.52G/5.00G [00:36<01:22, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  31% 1.53G/5.00G [00:36<01:21, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  31% 1.54G/5.00G [00:36<01:21, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  31% 1.55G/5.00G [00:37<01:20, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  31% 1.56G/5.00G [00:37<01:20, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  31% 1.57G/5.00G [00:37<01:20, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  32% 1.58G/5.00G [00:37<01:20, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  32% 1.59G/5.00G [00:38<01:19, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  32% 1.60G/5.00G [00:38<01:19, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  32% 1.61G/5.00G [00:38<01:18, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.63G/5.00G [00:38<01:18, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.64G/5.00G [00:39<01:18, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.65G/5.00G [00:39<01:18, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.66G/5.00G [00:39<01:18, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  33% 1.67G/5.00G [00:39<01:18, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  34% 1.68G/5.00G [00:40<01:18, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  34% 1.69G/5.00G [00:40<01:18, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  34% 1.70G/5.00G [00:40<01:18, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  34% 1.71G/5.00G [00:40<01:17, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  34% 1.72G/5.00G [00:41<01:17, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  35% 1.73G/5.00G [00:41<01:31, 35.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  35% 1.74G/5.00G [00:41<01:26, 37.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  35% 1.75G/5.00G [00:41<01:23, 39.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  35% 1.76G/5.00G [00:42<01:20, 40.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  35% 1.77G/5.00G [00:42<01:19, 40.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.78G/5.00G [00:42<01:18, 41.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.79G/5.00G [00:42<01:16, 41.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.80G/5.00G [00:43<01:16, 42.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.81G/5.00G [00:43<01:15, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  36% 1.82G/5.00G [00:43<01:14, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  37% 1.84G/5.00G [00:43<01:14, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  37% 1.85G/5.00G [00:44<01:13, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  37% 1.86G/5.00G [00:44<01:13, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  37% 1.87G/5.00G [00:44<01:13, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  38% 1.88G/5.00G [00:44<01:13, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  38% 1.89G/5.00G [00:45<01:12, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  38% 1.90G/5.00G [00:45<01:13, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  38% 1.91G/5.00G [00:45<01:12, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  38% 1.92G/5.00G [00:45<01:12, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.93G/5.00G [00:46<01:11, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.94G/5.00G [00:46<01:11, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.95G/5.00G [00:46<01:11, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.96G/5.00G [00:46<01:11, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  39% 1.97G/5.00G [00:47<01:10, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  40% 1.98G/5.00G [00:47<01:10, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  40% 1.99G/5.00G [00:47<01:09, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  40% 2.00G/5.00G [00:47<01:09, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  40% 2.01G/5.00G [00:48<01:09, 43.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  40% 2.02G/5.00G [00:48<01:09, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  41% 2.03G/5.00G [00:48<01:09, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  41% 2.04G/5.00G [00:48<01:09, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  41% 2.06G/5.00G [00:49<01:08, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  41% 2.07G/5.00G [00:49<01:09, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.08G/5.00G [00:49<01:08, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.09G/5.00G [00:49<01:08, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.10G/5.00G [00:50<01:07, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.11G/5.00G [00:50<01:07, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  42% 2.12G/5.00G [00:50<01:07, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  43% 2.13G/5.00G [00:50<01:07, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  43% 2.14G/5.00G [00:51<01:06, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  43% 2.15G/5.00G [00:51<01:06, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  43% 2.16G/5.00G [00:51<01:18, 36.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  43% 2.17G/5.00G [00:51<01:14, 37.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.18G/5.00G [00:52<01:12, 38.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.19G/5.00G [00:52<01:10, 40.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.20G/5.00G [00:52<01:08, 40.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.21G/5.00G [00:52<01:07, 41.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  44% 2.22G/5.00G [00:53<01:06, 41.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  45% 2.23G/5.00G [00:53<01:05, 41.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  45% 2.24G/5.00G [00:53<01:05, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  45% 2.25G/5.00G [00:53<01:05, 42.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  45% 2.26G/5.00G [00:54<01:04, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  46% 2.28G/5.00G [00:54<01:07, 40.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  46% 2.29G/5.00G [00:54<01:02, 43.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  46% 2.30G/5.00G [00:54<01:02, 43.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  46% 2.31G/5.00G [00:55<01:02, 43.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  46% 2.32G/5.00G [00:55<01:02, 43.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  47% 2.33G/5.00G [00:55<01:02, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  47% 2.34G/5.00G [00:55<01:01, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  47% 2.35G/5.00G [00:56<01:01, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  47% 2.36G/5.00G [00:56<01:01, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  47% 2.37G/5.00G [00:56<01:01, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.38G/5.00G [00:56<01:01, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.39G/5.00G [00:57<01:00, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.40G/5.00G [00:57<01:00, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.41G/5.00G [00:57<01:00, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  48% 2.42G/5.00G [00:57<01:00, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  49% 2.43G/5.00G [00:58<01:00, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  49% 2.44G/5.00G [00:58<01:00, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  49% 2.45G/5.00G [00:58<00:59, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  49% 2.46G/5.00G [00:58<00:59, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  49% 2.47G/5.00G [00:59<00:59, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  50% 2.49G/5.00G [00:59<00:58, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  50% 2.50G/5.00G [00:59<00:58, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  50% 2.51G/5.00G [00:59<00:58, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  50% 2.52G/5.00G [01:00<00:58, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  51% 2.53G/5.00G [01:00<00:57, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  51% 2.54G/5.00G [01:00<00:57, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  51% 2.55G/5.00G [01:00<00:57, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  51% 2.56G/5.00G [01:01<00:57, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  51% 2.57G/5.00G [01:01<00:56, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.58G/5.00G [01:01<00:56, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.59G/5.00G [01:01<00:56, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.60G/5.00G [01:02<00:57, 41.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.61G/5.00G [01:02<01:03, 37.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  52% 2.62G/5.00G [01:02<01:00, 39.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  53% 2.63G/5.00G [01:02<00:59, 40.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  53% 2.64G/5.00G [01:03<00:57, 40.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  53% 2.65G/5.00G [01:03<00:56, 41.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  53% 2.66G/5.00G [01:03<00:55, 41.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  53% 2.67G/5.00G [01:03<00:55, 42.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  54% 2.68G/5.00G [01:04<00:54, 42.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  54% 2.69G/5.00G [01:04<00:54, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  54% 2.71G/5.00G [01:04<00:54, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  54% 2.72G/5.00G [01:04<00:53, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.73G/5.00G [01:05<00:53, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.74G/5.00G [01:05<00:53, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.75G/5.00G [01:05<00:53, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.76G/5.00G [01:05<00:52, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  55% 2.77G/5.00G [01:06<00:52, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  56% 2.78G/5.00G [01:06<00:51, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  56% 2.79G/5.00G [01:06<00:51, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  56% 2.80G/5.00G [01:06<00:51, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  56% 2.81G/5.00G [01:07<00:51, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  56% 2.82G/5.00G [01:07<00:51, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.83G/5.00G [01:07<00:50, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.84G/5.00G [01:07<00:50, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.85G/5.00G [01:08<00:50, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.86G/5.00G [01:08<00:49, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  57% 2.87G/5.00G [01:08<00:49, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  58% 2.88G/5.00G [01:08<00:49, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  58% 2.89G/5.00G [01:09<00:49, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  58% 2.90G/5.00G [01:09<00:49, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  58% 2.92G/5.00G [01:09<00:48, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.93G/5.00G [01:09<00:48, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.94G/5.00G [01:09<00:48, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.95G/5.00G [01:10<00:48, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.96G/5.00G [01:10<00:47, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  59% 2.97G/5.00G [01:10<00:47, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 2.98G/5.00G [01:10<00:47, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 2.99G/5.00G [01:11<00:47, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 3.00G/5.00G [01:11<00:47, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 3.01G/5.00G [01:11<00:46, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  60% 3.02G/5.00G [01:11<00:46, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.03G/5.00G [01:12<00:46, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.04G/5.00G [01:12<01:02, 31.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.05G/5.00G [01:12<00:57, 34.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.06G/5.00G [01:13<00:53, 36.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  61% 3.07G/5.00G [01:13<00:50, 37.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  62% 3.08G/5.00G [01:13<00:48, 39.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  62% 3.09G/5.00G [01:13<00:47, 40.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  62% 3.10G/5.00G [01:14<00:46, 41.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  62% 3.11G/5.00G [01:14<00:45, 41.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  62% 3.12G/5.00G [01:14<00:44, 42.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  63% 3.14G/5.00G [01:14<00:44, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  63% 3.15G/5.00G [01:15<00:43, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  63% 3.16G/5.00G [01:15<00:43, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  63% 3.17G/5.00G [01:15<00:43, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.18G/5.00G [01:15<00:42, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.19G/5.00G [01:16<00:42, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.20G/5.00G [01:16<00:42, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.21G/5.00G [01:16<00:42, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  64% 3.22G/5.00G [01:16<00:41, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  65% 3.23G/5.00G [01:17<00:41, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  65% 3.24G/5.00G [01:17<00:41, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  65% 3.25G/5.00G [01:17<00:40, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  65% 3.26G/5.00G [01:17<00:40, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  65% 3.27G/5.00G [01:18<00:40, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.28G/5.00G [01:18<00:40, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.29G/5.00G [01:18<00:40, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.30G/5.00G [01:18<00:39, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.31G/5.00G [01:19<00:39, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  66% 3.32G/5.00G [01:19<00:39, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  67% 3.33G/5.00G [01:19<00:38, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  67% 3.34G/5.00G [01:19<00:38, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  67% 3.36G/5.00G [01:20<00:38, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  67% 3.37G/5.00G [01:20<00:38, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  68% 3.38G/5.00G [01:20<00:37, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  68% 3.39G/5.00G [01:20<00:37, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  68% 3.40G/5.00G [01:21<00:37, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  68% 3.41G/5.00G [01:21<00:37, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  68% 3.42G/5.00G [01:21<00:37, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.43G/5.00G [01:21<00:36, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.44G/5.00G [01:22<00:36, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.45G/5.00G [01:22<00:36, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.46G/5.00G [01:22<00:45, 33.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  69% 3.47G/5.00G [01:23<00:42, 36.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  70% 3.48G/5.00G [01:23<00:40, 37.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  70% 3.49G/5.00G [01:23<00:38, 39.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  70% 3.50G/5.00G [01:23<00:37, 40.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  70% 3.51G/5.00G [01:24<00:36, 40.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  70% 3.52G/5.00G [01:24<00:35, 41.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  71% 3.53G/5.00G [01:24<00:35, 41.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  71% 3.54G/5.00G [01:24<00:34, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  71% 3.55G/5.00G [01:24<00:34, 42.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  71% 3.57G/5.00G [01:25<00:33, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.58G/5.00G [01:25<00:33, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.59G/5.00G [01:25<00:33, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.60G/5.00G [01:25<00:32, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.61G/5.00G [01:26<00:32, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  72% 3.62G/5.00G [01:26<00:32, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  73% 3.63G/5.00G [01:26<00:32, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  73% 3.64G/5.00G [01:26<00:31, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  73% 3.65G/5.00G [01:27<00:31, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  73% 3.66G/5.00G [01:27<00:31, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  73% 3.67G/5.00G [01:27<00:31, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.68G/5.00G [01:27<00:30, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.69G/5.00G [01:28<00:30, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.70G/5.00G [01:28<00:30, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.71G/5.00G [01:28<00:30, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  74% 3.72G/5.00G [01:28<00:29, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  75% 3.73G/5.00G [01:29<00:29, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  75% 3.74G/5.00G [01:29<00:29, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  75% 3.75G/5.00G [01:29<00:29, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  75% 3.76G/5.00G [01:29<00:28, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  76% 3.77G/5.00G [01:30<00:28, 43.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  76% 3.79G/5.00G [01:30<00:28, 43.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  76% 3.80G/5.00G [01:30<00:28, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  76% 3.81G/5.00G [01:30<00:27, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  76% 3.82G/5.00G [01:31<00:27, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  77% 3.83G/5.00G [01:31<00:27, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  77% 3.84G/5.00G [01:31<00:27, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  77% 3.85G/5.00G [01:31<00:26, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  77% 3.86G/5.00G [01:32<00:26, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  77% 3.87G/5.00G [01:32<00:26, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  78% 3.88G/5.00G [01:32<00:26, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  78% 3.89G/5.00G [01:32<00:25, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  78% 3.90G/5.00G [01:33<00:32, 33.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  78% 3.91G/5.00G [01:33<00:32, 34.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  78% 3.92G/5.00G [01:33<00:29, 36.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  79% 3.93G/5.00G [01:34<00:28, 37.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  79% 3.94G/5.00G [01:34<00:27, 39.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  79% 3.95G/5.00G [01:34<00:26, 40.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  79% 3.96G/5.00G [01:34<00:25, 40.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  79% 3.97G/5.00G [01:35<00:24, 41.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  80% 3.98G/5.00G [01:35<00:24, 41.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  80% 4.00G/5.00G [01:35<00:23, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  80% 4.01G/5.00G [01:35<00:23, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  80% 4.02G/5.00G [01:36<00:23, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  81% 4.03G/5.00G [01:36<00:22, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  81% 4.04G/5.00G [01:36<00:22, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  81% 4.05G/5.00G [01:36<00:22, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  81% 4.06G/5.00G [01:37<00:21, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  81% 4.07G/5.00G [01:37<00:21, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  82% 4.08G/5.00G [01:37<00:21, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  82% 4.09G/5.00G [01:37<00:21, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  82% 4.10G/5.00G [01:38<00:21, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  82% 4.11G/5.00G [01:38<00:20, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  82% 4.12G/5.00G [01:38<00:20, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.13G/5.00G [01:38<00:20, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.14G/5.00G [01:38<00:20, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.15G/5.00G [01:39<00:19, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.16G/5.00G [01:39<00:19, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  83% 4.17G/5.00G [01:39<00:19, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  84% 4.18G/5.00G [01:39<00:19, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  84% 4.19G/5.00G [01:40<00:18, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  84% 4.20G/5.00G [01:40<00:18, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  84% 4.22G/5.00G [01:40<00:18, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  85% 4.23G/5.00G [01:41<00:23, 33.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  85% 4.24G/5.00G [01:41<00:21, 35.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  85% 4.25G/5.00G [01:41<00:20, 37.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  85% 4.26G/5.00G [01:41<00:19, 38.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  85% 4.27G/5.00G [01:42<00:18, 39.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.28G/5.00G [01:42<00:17, 40.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.29G/5.00G [01:42<00:17, 41.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.30G/5.00G [01:42<00:16, 41.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.31G/5.00G [01:43<00:16, 42.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  86% 4.32G/5.00G [01:43<00:16, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  87% 4.33G/5.00G [01:43<00:21, 31.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  87% 4.34G/5.00G [01:44<00:19, 34.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  87% 4.35G/5.00G [01:44<00:17, 36.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  87% 4.36G/5.00G [01:44<00:16, 38.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  87% 4.37G/5.00G [01:44<00:15, 39.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  88% 4.38G/5.00G [01:45<00:15, 40.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  88% 4.39G/5.00G [01:45<00:14, 41.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  88% 4.40G/5.00G [01:45<00:14, 41.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  88% 4.41G/5.00G [01:45<00:14, 41.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.42G/5.00G [01:46<00:13, 42.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.44G/5.00G [01:46<00:13, 42.3MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.45G/5.00G [01:46<00:13, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.46G/5.00G [01:46<00:12, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  89% 4.47G/5.00G [01:47<00:12, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  90% 4.48G/5.00G [01:47<00:12, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  90% 4.49G/5.00G [01:47<00:11, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  90% 4.50G/5.00G [01:47<00:11, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  90% 4.51G/5.00G [01:48<00:11, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  90% 4.52G/5.00G [01:48<00:11, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.53G/5.00G [01:48<00:11, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.54G/5.00G [01:48<00:10, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.55G/5.00G [01:49<00:10, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.56G/5.00G [01:49<00:10, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  91% 4.57G/5.00G [01:49<00:09, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  92% 4.58G/5.00G [01:49<00:09, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  92% 4.59G/5.00G [01:50<00:09, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  92% 4.60G/5.00G [01:50<00:09, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  92% 4.61G/5.00G [01:50<00:09, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  92% 4.62G/5.00G [01:50<00:08, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  93% 4.63G/5.00G [01:51<00:08, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  93% 4.65G/5.00G [01:51<00:08, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  93% 4.66G/5.00G [01:51<00:08, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  93% 4.67G/5.00G [01:51<00:07, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  94% 4.68G/5.00G [01:52<00:07, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  94% 4.69G/5.00G [01:52<00:07, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  94% 4.70G/5.00G [01:52<00:07, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  94% 4.71G/5.00G [01:52<00:06, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  94% 4.72G/5.00G [01:52<00:06, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  95% 4.73G/5.00G [01:53<00:06, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  95% 4.74G/5.00G [01:53<00:06, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  95% 4.75G/5.00G [01:53<00:05, 42.5MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  95% 4.76G/5.00G [01:54<00:07, 30.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  95% 4.77G/5.00G [01:54<00:06, 33.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.78G/5.00G [01:54<00:06, 36.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.79G/5.00G [01:55<00:05, 37.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.80G/5.00G [01:55<00:05, 39.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.81G/5.00G [01:55<00:04, 40.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  96% 4.82G/5.00G [01:55<00:04, 41.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  97% 4.83G/5.00G [01:56<00:03, 41.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  97% 4.84G/5.00G [01:56<00:03, 42.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  97% 4.85G/5.00G [01:56<00:03, 42.2MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  97% 4.87G/5.00G [01:56<00:03, 42.4MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  98% 4.88G/5.00G [01:56<00:02, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  98% 4.89G/5.00G [01:57<00:02, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  98% 4.90G/5.00G [01:57<00:02, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  98% 4.91G/5.00G [01:57<00:02, 42.7MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  98% 4.92G/5.00G [01:57<00:01, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.93G/5.00G [01:58<00:01, 42.6MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.94G/5.00G [01:58<00:01, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.95G/5.00G [01:58<00:01, 42.9MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.96G/5.00G [01:58<00:00, 43.0MB/s]\u001b[A\n","model-00002-of-00004.safetensors:  99% 4.97G/5.00G [01:59<00:00, 43.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors: 100% 4.98G/5.00G [01:59<00:00, 43.1MB/s]\u001b[A\n","model-00002-of-00004.safetensors: 100% 4.99G/5.00G [01:59<00:00, 42.8MB/s]\u001b[A\n","model-00002-of-00004.safetensors: 100% 5.00G/5.00G [01:59<00:00, 41.7MB/s]\n","Downloading shards:  50% 2/4 [03:59<04:00, 120.08s/it]\n","model-00003-of-00004.safetensors:   0% 0.00/4.92G [00:00<?, ?B/s]\u001b[A\n","model-00003-of-00004.safetensors:   0% 10.5M/4.92G [00:00<02:00, 40.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   0% 21.0M/4.92G [00:00<01:57, 41.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 31.5M/4.92G [00:00<01:55, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 41.9M/4.92G [00:00<01:54, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 52.4M/4.92G [00:01<01:54, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 62.9M/4.92G [00:01<01:53, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   1% 73.4M/4.92G [00:01<01:54, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   2% 83.9M/4.92G [00:01<01:52, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   2% 94.4M/4.92G [00:02<01:52, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   2% 105M/4.92G [00:02<01:53, 42.5MB/s] \u001b[A\n","model-00003-of-00004.safetensors:   2% 115M/4.92G [00:02<01:52, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   3% 126M/4.92G [00:02<01:51, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   3% 136M/4.92G [00:03<01:51, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   3% 147M/4.92G [00:03<01:51, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   3% 157M/4.92G [00:03<01:51, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   3% 168M/4.92G [00:03<01:51, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   4% 178M/4.92G [00:04<01:50, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   4% 189M/4.92G [00:04<01:49, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   4% 199M/4.92G [00:04<01:49, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   4% 210M/4.92G [00:04<01:49, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   4% 220M/4.92G [00:05<01:49, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   5% 231M/4.92G [00:05<01:49, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   5% 241M/4.92G [00:05<01:48, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   5% 252M/4.92G [00:05<01:49, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   5% 262M/4.92G [00:06<01:49, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 273M/4.92G [00:06<01:49, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 283M/4.92G [00:06<01:49, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 294M/4.92G [00:06<01:49, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 304M/4.92G [00:07<01:48, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   6% 315M/4.92G [00:07<01:48, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   7% 325M/4.92G [00:07<01:47, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   7% 336M/4.92G [00:07<01:47, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   7% 346M/4.92G [00:08<01:46, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   7% 357M/4.92G [00:08<01:45, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   7% 367M/4.92G [00:08<01:46, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   8% 377M/4.92G [00:08<01:46, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   8% 388M/4.92G [00:09<01:45, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   8% 398M/4.92G [00:09<01:45, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   8% 409M/4.92G [00:09<01:45, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 419M/4.92G [00:09<01:44, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 430M/4.92G [00:10<01:44, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 440M/4.92G [00:10<02:06, 35.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 451M/4.92G [00:10<01:59, 37.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:   9% 461M/4.92G [00:10<01:55, 38.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  10% 472M/4.92G [00:11<01:51, 40.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  10% 482M/4.92G [00:11<01:49, 40.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  10% 493M/4.92G [00:11<01:47, 41.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  10% 503M/4.92G [00:11<01:45, 41.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  10% 514M/4.92G [00:12<01:44, 42.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  11% 524M/4.92G [00:12<01:43, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  11% 535M/4.92G [00:12<01:43, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  11% 545M/4.92G [00:12<01:42, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  11% 556M/4.92G [00:13<01:42, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 566M/4.92G [00:13<01:41, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 577M/4.92G [00:13<01:41, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 587M/4.92G [00:13<01:41, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 598M/4.92G [00:14<01:40, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  12% 608M/4.92G [00:14<01:40, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  13% 619M/4.92G [00:14<01:40, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  13% 629M/4.92G [00:14<01:40, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  13% 640M/4.92G [00:15<01:40, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  13% 650M/4.92G [00:15<01:39, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  13% 661M/4.92G [00:15<01:39, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  14% 671M/4.92G [00:15<01:39, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  14% 682M/4.92G [00:16<01:40, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  14% 692M/4.92G [00:16<01:39, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  14% 703M/4.92G [00:16<01:38, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  15% 713M/4.92G [00:16<01:38, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  15% 724M/4.92G [00:17<01:43, 40.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  15% 734M/4.92G [00:17<01:36, 43.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  15% 744M/4.92G [00:17<01:36, 43.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  15% 755M/4.92G [00:17<01:36, 43.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  16% 765M/4.92G [00:18<01:36, 43.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  16% 776M/4.92G [00:18<01:36, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  16% 786M/4.92G [00:18<01:35, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  16% 797M/4.92G [00:18<01:35, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  16% 807M/4.92G [00:19<01:35, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  17% 818M/4.92G [00:19<01:35, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  17% 828M/4.92G [00:19<01:34, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  17% 839M/4.92G [00:19<01:34, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  17% 849M/4.92G [00:20<01:34, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  17% 860M/4.92G [00:20<01:34, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  18% 870M/4.92G [00:20<01:34, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  18% 881M/4.92G [00:20<01:51, 36.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  18% 891M/4.92G [00:21<01:55, 34.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  18% 902M/4.92G [00:21<01:48, 36.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 912M/4.92G [00:21<01:44, 38.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 923M/4.92G [00:21<01:40, 39.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 933M/4.92G [00:22<01:37, 40.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 944M/4.92G [00:22<01:36, 41.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  19% 954M/4.92G [00:22<01:34, 41.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  20% 965M/4.92G [00:22<01:34, 42.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  20% 975M/4.92G [00:23<01:33, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  20% 986M/4.92G [00:23<01:32, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  20% 996M/4.92G [00:23<01:31, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  20% 1.01G/4.92G [00:23<01:31, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  21% 1.02G/4.92G [00:24<01:30, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  21% 1.03G/4.92G [00:24<01:30, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  21% 1.04G/4.92G [00:24<01:30, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  21% 1.05G/4.92G [00:24<01:29, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.06G/4.92G [00:25<01:30, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.07G/4.92G [00:25<01:29, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.08G/4.92G [00:25<01:29, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.09G/4.92G [00:25<01:29, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  22% 1.10G/4.92G [00:26<01:29, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  23% 1.11G/4.92G [00:26<01:29, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  23% 1.12G/4.92G [00:26<01:28, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  23% 1.13G/4.92G [00:26<01:28, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  23% 1.14G/4.92G [00:27<01:28, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  23% 1.15G/4.92G [00:27<01:28, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  24% 1.16G/4.92G [00:27<01:28, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  24% 1.17G/4.92G [00:27<01:27, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  24% 1.18G/4.92G [00:28<01:27, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  24% 1.20G/4.92G [00:28<01:27, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  25% 1.21G/4.92G [00:28<01:26, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  25% 1.22G/4.92G [00:28<01:27, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  25% 1.23G/4.92G [00:29<01:26, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  25% 1.24G/4.92G [00:29<01:26, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  25% 1.25G/4.92G [00:29<01:26, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.26G/4.92G [00:29<01:26, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.27G/4.92G [00:30<01:25, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.28G/4.92G [00:30<01:25, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.29G/4.92G [00:30<01:25, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  26% 1.30G/4.92G [00:30<01:24, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  27% 1.31G/4.92G [00:31<01:24, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  27% 1.32G/4.92G [00:31<01:35, 37.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  27% 1.33G/4.92G [00:31<01:31, 39.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  27% 1.34G/4.92G [00:31<01:29, 40.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  28% 1.35G/4.92G [00:32<01:27, 40.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  28% 1.36G/4.92G [00:32<01:25, 41.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  28% 1.37G/4.92G [00:32<01:24, 41.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  28% 1.38G/4.92G [00:32<01:24, 41.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  28% 1.39G/4.92G [00:33<01:23, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  29% 1.41G/4.92G [00:33<01:23, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  29% 1.42G/4.92G [00:33<01:22, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  29% 1.43G/4.92G [00:33<01:22, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  29% 1.44G/4.92G [00:34<01:21, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  29% 1.45G/4.92G [00:34<01:21, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  30% 1.46G/4.92G [00:34<01:21, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  30% 1.47G/4.92G [00:34<01:20, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  30% 1.48G/4.92G [00:35<01:20, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  30% 1.49G/4.92G [00:35<01:20, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.50G/4.92G [00:35<01:20, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.51G/4.92G [00:35<01:20, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.52G/4.92G [00:36<01:20, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.53G/4.92G [00:36<01:19, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  31% 1.54G/4.92G [00:36<01:19, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  32% 1.55G/4.92G [00:36<01:19, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  32% 1.56G/4.92G [00:37<01:18, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  32% 1.57G/4.92G [00:37<01:18, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  32% 1.58G/4.92G [00:37<01:17, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  32% 1.59G/4.92G [00:37<01:17, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.60G/4.92G [00:38<01:17, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.61G/4.92G [00:38<01:16, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.63G/4.92G [00:38<01:16, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.64G/4.92G [00:38<01:16, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  33% 1.65G/4.92G [00:39<01:16, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  34% 1.66G/4.92G [00:39<01:15, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  34% 1.67G/4.92G [00:39<01:15, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  34% 1.68G/4.92G [00:39<01:15, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  34% 1.69G/4.92G [00:40<01:14, 43.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  35% 1.70G/4.92G [00:40<01:14, 43.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  35% 1.71G/4.92G [00:40<01:14, 43.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  35% 1.72G/4.92G [00:40<01:14, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  35% 1.73G/4.92G [00:40<01:14, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  35% 1.74G/4.92G [00:41<01:14, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  36% 1.75G/4.92G [00:41<01:14, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  36% 1.76G/4.92G [00:41<01:21, 38.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  36% 1.77G/4.92G [00:42<01:19, 39.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  36% 1.78G/4.92G [00:42<01:16, 40.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  36% 1.79G/4.92G [00:42<01:16, 41.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  37% 1.80G/4.92G [00:42<01:14, 41.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  37% 1.81G/4.92G [00:43<01:13, 41.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  37% 1.82G/4.92G [00:43<01:13, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  37% 1.84G/4.92G [00:43<01:12, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.85G/4.92G [00:43<01:12, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.86G/4.92G [00:44<01:12, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.87G/4.92G [00:44<01:11, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.88G/4.92G [00:44<01:11, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  38% 1.89G/4.92G [00:44<01:11, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  39% 1.90G/4.92G [00:45<01:10, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  39% 1.91G/4.92G [00:45<01:10, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  39% 1.92G/4.92G [00:45<01:09, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  39% 1.93G/4.92G [00:45<01:09, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  39% 1.94G/4.92G [00:45<01:09, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  40% 1.95G/4.92G [00:46<01:09, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  40% 1.96G/4.92G [00:46<01:09, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  40% 1.97G/4.92G [00:46<01:08, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  40% 1.98G/4.92G [00:46<01:08, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 1.99G/4.92G [00:47<01:08, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 2.00G/4.92G [00:47<01:08, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 2.01G/4.92G [00:47<01:07, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 2.02G/4.92G [00:47<01:07, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  41% 2.03G/4.92G [00:48<01:07, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  42% 2.04G/4.92G [00:48<01:06, 43.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  42% 2.06G/4.92G [00:48<01:06, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  42% 2.07G/4.92G [00:48<01:06, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  42% 2.08G/4.92G [00:49<01:06, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  42% 2.09G/4.92G [00:49<01:06, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  43% 2.10G/4.92G [00:49<01:06, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  43% 2.11G/4.92G [00:49<01:05, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  43% 2.12G/4.92G [00:50<01:05, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  43% 2.13G/4.92G [00:50<01:05, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.14G/4.92G [00:50<01:05, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.15G/4.92G [00:50<01:04, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.16G/4.92G [00:51<01:04, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.17G/4.92G [00:51<01:04, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  44% 2.18G/4.92G [00:51<01:04, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  45% 2.19G/4.92G [00:51<01:03, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  45% 2.20G/4.92G [00:52<01:09, 39.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  45% 2.21G/4.92G [00:52<01:09, 38.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  45% 2.22G/4.92G [00:52<01:07, 40.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  45% 2.23G/4.92G [00:52<01:06, 40.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.24G/4.92G [00:53<01:04, 41.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.25G/4.92G [00:53<01:04, 41.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.26G/4.92G [00:53<01:03, 41.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.28G/4.92G [00:53<01:02, 42.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  46% 2.29G/4.92G [00:54<01:02, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  47% 2.30G/4.92G [00:54<01:01, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  47% 2.31G/4.92G [00:54<01:01, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  47% 2.32G/4.92G [00:54<01:00, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  47% 2.33G/4.92G [00:55<01:00, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  48% 2.34G/4.92G [00:55<01:00, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  48% 2.35G/4.92G [00:55<01:00, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  48% 2.36G/4.92G [00:55<01:00, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  48% 2.37G/4.92G [00:56<00:59, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  48% 2.38G/4.92G [00:56<00:59, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.39G/4.92G [00:56<00:59, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.40G/4.92G [00:56<00:59, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.41G/4.92G [00:57<00:58, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.42G/4.92G [00:57<00:58, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  49% 2.43G/4.92G [00:57<00:58, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  50% 2.44G/4.92G [00:57<00:57, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  50% 2.45G/4.92G [00:58<00:57, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  50% 2.46G/4.92G [00:58<00:57, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  50% 2.47G/4.92G [00:58<00:57, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.49G/4.92G [00:58<00:57, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.50G/4.92G [00:59<00:56, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.51G/4.92G [00:59<00:56, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.52G/4.92G [00:59<00:56, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  51% 2.53G/4.92G [00:59<00:56, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.54G/4.92G [01:00<00:55, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.55G/4.92G [01:00<00:55, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.56G/4.92G [01:00<00:55, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.57G/4.92G [01:00<00:54, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  52% 2.58G/4.92G [01:01<00:54, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  53% 2.59G/4.92G [01:01<00:54, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  53% 2.60G/4.92G [01:01<00:54, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  53% 2.61G/4.92G [01:01<00:54, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  53% 2.62G/4.92G [01:02<00:53, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.63G/4.92G [01:02<00:53, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.64G/4.92G [01:02<00:58, 38.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.65G/4.92G [01:02<00:56, 39.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.66G/4.92G [01:03<00:55, 40.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  54% 2.67G/4.92G [01:03<00:54, 41.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  55% 2.68G/4.92G [01:03<00:53, 41.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  55% 2.69G/4.92G [01:03<00:52, 42.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  55% 2.71G/4.92G [01:04<00:52, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  55% 2.72G/4.92G [01:04<00:51, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  55% 2.73G/4.92G [01:04<00:51, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  56% 2.74G/4.92G [01:04<00:51, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  56% 2.75G/4.92G [01:05<00:51, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  56% 2.76G/4.92G [01:05<00:50, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  56% 2.77G/4.92G [01:05<00:50, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  57% 2.78G/4.92G [01:05<00:50, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  57% 2.79G/4.92G [01:06<00:50, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  57% 2.80G/4.92G [01:06<00:49, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  57% 2.81G/4.92G [01:06<00:49, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  57% 2.82G/4.92G [01:06<00:49, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.83G/4.92G [01:07<00:49, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.84G/4.92G [01:07<00:48, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.85G/4.92G [01:07<00:48, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.86G/4.92G [01:07<00:48, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  58% 2.87G/4.92G [01:08<00:48, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  59% 2.88G/4.92G [01:08<00:47, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  59% 2.89G/4.92G [01:08<00:47, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  59% 2.90G/4.92G [01:08<00:47, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  59% 2.92G/4.92G [01:09<00:46, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  60% 2.93G/4.92G [01:09<00:46, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  60% 2.94G/4.92G [01:09<00:46, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  60% 2.95G/4.92G [01:09<00:46, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  60% 2.96G/4.92G [01:10<00:46, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  60% 2.97G/4.92G [01:10<00:45, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 2.98G/4.92G [01:10<00:45, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 2.99G/4.92G [01:10<00:45, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 3.00G/4.92G [01:11<00:44, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 3.01G/4.92G [01:11<00:44, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  61% 3.02G/4.92G [01:11<00:44, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  62% 3.03G/4.92G [01:11<00:44, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  62% 3.04G/4.92G [01:11<00:43, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  62% 3.05G/4.92G [01:12<00:43, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  62% 3.06G/4.92G [01:12<00:43, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  62% 3.07G/4.92G [01:12<00:42, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  63% 3.08G/4.92G [01:13<00:50, 36.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  63% 3.09G/4.92G [01:13<00:49, 36.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  63% 3.10G/4.92G [01:13<00:47, 38.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  63% 3.11G/4.92G [01:13<00:45, 39.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  64% 3.12G/4.92G [01:14<00:44, 40.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  64% 3.14G/4.92G [01:14<00:43, 40.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  64% 3.15G/4.92G [01:14<00:42, 41.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  64% 3.16G/4.92G [01:14<00:42, 41.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  64% 3.17G/4.92G [01:15<00:41, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.18G/4.92G [01:15<00:41, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.19G/4.92G [01:15<00:40, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.20G/4.92G [01:15<00:40, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.21G/4.92G [01:16<00:40, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  65% 3.22G/4.92G [01:16<00:39, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  66% 3.23G/4.92G [01:16<00:39, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  66% 3.24G/4.92G [01:16<00:39, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  66% 3.25G/4.92G [01:17<00:39, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  66% 3.26G/4.92G [01:17<00:39, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.27G/4.92G [01:17<00:38, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.28G/4.92G [01:17<00:38, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.29G/4.92G [01:18<00:38, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.30G/4.92G [01:18<00:37, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  67% 3.31G/4.92G [01:18<00:37, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  68% 3.32G/4.92G [01:18<00:37, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  68% 3.33G/4.92G [01:19<00:36, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  68% 3.34G/4.92G [01:19<00:36, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  68% 3.36G/4.92G [01:19<00:36, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  68% 3.37G/4.92G [01:19<00:36, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  69% 3.38G/4.92G [01:20<00:35, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  69% 3.39G/4.92G [01:20<00:35, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  69% 3.40G/4.92G [01:20<00:35, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  69% 3.41G/4.92G [01:20<00:35, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  70% 3.42G/4.92G [01:21<00:35, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  70% 3.43G/4.92G [01:21<00:35, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  70% 3.44G/4.92G [01:21<00:34, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  70% 3.45G/4.92G [01:21<00:34, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  70% 3.46G/4.92G [01:22<00:34, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  71% 3.47G/4.92G [01:22<00:33, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  71% 3.48G/4.92G [01:22<00:33, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  71% 3.49G/4.92G [01:22<00:33, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  71% 3.50G/4.92G [01:22<00:33, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  71% 3.51G/4.92G [01:23<00:32, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  72% 3.52G/4.92G [01:23<00:32, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  72% 3.53G/4.92G [01:23<00:32, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  72% 3.54G/4.92G [01:23<00:32, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  72% 3.55G/4.92G [01:24<00:32, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.57G/4.92G [01:24<00:31, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.58G/4.92G [01:24<00:31, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.59G/4.92G [01:24<00:31, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.60G/4.92G [01:25<00:30, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  73% 3.61G/4.92G [01:25<00:38, 34.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  74% 3.62G/4.92G [01:25<00:35, 36.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  74% 3.63G/4.92G [01:26<00:34, 37.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  74% 3.64G/4.92G [01:26<00:32, 39.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  74% 3.65G/4.92G [01:26<00:31, 39.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  74% 3.66G/4.92G [01:26<00:30, 40.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  75% 3.67G/4.92G [01:27<00:30, 41.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  75% 3.68G/4.92G [01:27<00:29, 41.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  75% 3.69G/4.92G [01:27<00:29, 41.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  75% 3.70G/4.92G [01:27<00:28, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  76% 3.71G/4.92G [01:28<00:28, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  76% 3.72G/4.92G [01:28<00:28, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  76% 3.73G/4.92G [01:28<00:28, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  76% 3.74G/4.92G [01:28<00:27, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  76% 3.75G/4.92G [01:29<00:27, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  77% 3.76G/4.92G [01:29<00:27, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  77% 3.77G/4.92G [01:29<00:26, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  77% 3.79G/4.92G [01:29<00:26, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  77% 3.80G/4.92G [01:30<00:26, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  77% 3.81G/4.92G [01:30<00:26, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.82G/4.92G [01:30<00:26, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.83G/4.92G [01:30<00:25, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.84G/4.92G [01:31<00:25, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.85G/4.92G [01:31<00:25, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  78% 3.86G/4.92G [01:31<00:25, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  79% 3.87G/4.92G [01:31<00:24, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  79% 3.88G/4.92G [01:32<00:24, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  79% 3.89G/4.92G [01:32<00:24, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  79% 3.90G/4.92G [01:32<00:23, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  80% 3.91G/4.92G [01:32<00:23, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  80% 3.92G/4.92G [01:33<00:23, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  80% 3.93G/4.92G [01:33<00:23, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  80% 3.94G/4.92G [01:33<00:22, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  80% 3.95G/4.92G [01:33<00:22, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 3.96G/4.92G [01:34<00:22, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 3.97G/4.92G [01:34<00:21, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 3.98G/4.92G [01:34<00:21, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 4.00G/4.92G [01:34<00:21, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  81% 4.01G/4.92G [01:35<00:21, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  82% 4.02G/4.92G [01:35<00:20, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  82% 4.03G/4.92G [01:35<00:20, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  82% 4.04G/4.92G [01:35<00:20, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  82% 4.05G/4.92G [01:36<00:24, 35.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  83% 4.06G/4.92G [01:36<00:25, 33.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  83% 4.07G/4.92G [01:36<00:23, 35.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  83% 4.08G/4.92G [01:37<00:22, 37.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  83% 4.09G/4.92G [01:37<00:21, 38.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  83% 4.10G/4.92G [01:37<00:20, 39.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.11G/4.92G [01:37<00:19, 40.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.12G/4.92G [01:38<00:19, 41.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.13G/4.92G [01:38<00:18, 41.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.14G/4.92G [01:38<00:18, 41.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  84% 4.15G/4.92G [01:38<00:18, 42.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  85% 4.16G/4.92G [01:39<00:17, 42.1MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  85% 4.17G/4.92G [01:39<00:17, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  85% 4.18G/4.92G [01:39<00:17, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  85% 4.19G/4.92G [01:39<00:16, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  86% 4.20G/4.92G [01:40<00:16, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  86% 4.22G/4.92G [01:40<00:16, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  86% 4.23G/4.92G [01:40<00:16, 42.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  86% 4.24G/4.92G [01:40<00:15, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  86% 4.25G/4.92G [01:40<00:15, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  87% 4.26G/4.92G [01:41<00:15, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  87% 4.27G/4.92G [01:41<00:15, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  87% 4.28G/4.92G [01:41<00:14, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  87% 4.29G/4.92G [01:41<00:14, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  87% 4.30G/4.92G [01:42<00:14, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  88% 4.31G/4.92G [01:42<00:14, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  88% 4.32G/4.92G [01:42<00:14, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  88% 4.33G/4.92G [01:42<00:13, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  88% 4.34G/4.92G [01:43<00:13, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.35G/4.92G [01:43<00:13, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.36G/4.92G [01:43<00:13, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.37G/4.92G [01:43<00:12, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.38G/4.92G [01:44<00:12, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  89% 4.39G/4.92G [01:44<00:12, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  90% 4.40G/4.92G [01:44<00:12, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  90% 4.41G/4.92G [01:44<00:11, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  90% 4.42G/4.92G [01:45<00:13, 37.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  90% 4.44G/4.92G [01:45<00:10, 44.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  90% 4.45G/4.92G [01:45<00:10, 43.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  91% 4.46G/4.92G [01:45<00:10, 43.2MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  91% 4.47G/4.92G [01:46<00:10, 43.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  91% 4.48G/4.92G [01:46<00:11, 38.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  91% 4.49G/4.92G [01:47<00:14, 30.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.50G/4.92G [01:47<00:12, 33.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.51G/4.92G [01:47<00:11, 35.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.52G/4.92G [01:47<00:10, 37.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.53G/4.92G [01:48<00:09, 38.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  92% 4.54G/4.92G [01:48<00:09, 39.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  93% 4.55G/4.92G [01:48<00:09, 40.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  93% 4.56G/4.92G [01:48<00:08, 40.9MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  93% 4.57G/4.92G [01:49<00:08, 41.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  93% 4.58G/4.92G [01:49<00:08, 41.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  93% 4.59G/4.92G [01:49<00:07, 41.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  94% 4.60G/4.92G [01:49<00:07, 42.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  94% 4.61G/4.92G [01:50<00:07, 42.0MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  94% 4.62G/4.92G [01:50<00:06, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  94% 4.63G/4.92G [01:50<00:06, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  94% 4.65G/4.92G [01:50<00:06, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  95% 4.66G/4.92G [01:50<00:06, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  95% 4.67G/4.92G [01:51<00:05, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  95% 4.68G/4.92G [01:51<00:05, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  95% 4.69G/4.92G [01:51<00:05, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.70G/4.92G [01:51<00:05, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.71G/4.92G [01:52<00:04, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.72G/4.92G [01:52<00:04, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.73G/4.92G [01:52<00:04, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  96% 4.74G/4.92G [01:52<00:04, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  97% 4.75G/4.92G [01:53<00:03, 42.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  97% 4.76G/4.92G [01:53<00:03, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  97% 4.77G/4.92G [01:53<00:03, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  97% 4.78G/4.92G [01:53<00:03, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  97% 4.79G/4.92G [01:54<00:02, 42.3MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  98% 4.80G/4.92G [01:54<00:02, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  98% 4.81G/4.92G [01:54<00:02, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  98% 4.82G/4.92G [01:54<00:02, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  98% 4.83G/4.92G [01:55<00:01, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.84G/4.92G [01:55<00:01, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.85G/4.92G [01:55<00:01, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.87G/4.92G [01:55<00:01, 42.6MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.88G/4.92G [01:56<00:00, 42.7MB/s]\u001b[A\n","model-00003-of-00004.safetensors:  99% 4.89G/4.92G [01:56<00:00, 42.8MB/s]\u001b[A\n","model-00003-of-00004.safetensors: 100% 4.90G/4.92G [01:56<00:00, 42.4MB/s]\u001b[A\n","model-00003-of-00004.safetensors: 100% 4.91G/4.92G [01:57<00:00, 34.5MB/s]\u001b[A\n","model-00003-of-00004.safetensors: 100% 4.92G/4.92G [01:57<00:00, 41.9MB/s]\n","Downloading shards:  75% 3/4 [05:57<01:58, 118.94s/it]\n","model-00004-of-00004.safetensors:   0% 0.00/1.17G [00:00<?, ?B/s]\u001b[A\n","model-00004-of-00004.safetensors:   1% 10.5M/1.17G [00:00<00:28, 41.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   2% 21.0M/1.17G [00:00<00:27, 42.0MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   3% 31.5M/1.17G [00:00<00:26, 42.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   4% 41.9M/1.17G [00:00<00:26, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   4% 52.4M/1.17G [00:01<00:26, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   5% 62.9M/1.17G [00:01<00:25, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   6% 73.4M/1.17G [00:01<00:25, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   7% 83.9M/1.17G [00:01<00:25, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   8% 94.4M/1.17G [00:02<00:25, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:   9% 105M/1.17G [00:02<00:24, 42.5MB/s] \u001b[A\n","model-00004-of-00004.safetensors:  10% 115M/1.17G [00:02<00:24, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  11% 126M/1.17G [00:02<00:24, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  12% 136M/1.17G [00:03<00:24, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  13% 147M/1.17G [00:03<00:23, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  13% 157M/1.17G [00:03<00:23, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  14% 168M/1.17G [00:03<00:23, 42.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  15% 178M/1.17G [00:04<00:23, 42.0MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  16% 189M/1.17G [00:04<00:23, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  17% 199M/1.17G [00:04<00:22, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  18% 210M/1.17G [00:04<00:22, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  19% 220M/1.17G [00:05<00:22, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  20% 231M/1.17G [00:05<00:22, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  21% 241M/1.17G [00:05<00:21, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  22% 252M/1.17G [00:05<00:21, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  22% 262M/1.17G [00:06<00:21, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  23% 273M/1.17G [00:06<00:21, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  24% 283M/1.17G [00:06<00:20, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  25% 294M/1.17G [00:06<00:20, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  26% 304M/1.17G [00:07<00:20, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  27% 315M/1.17G [00:07<00:20, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  28% 325M/1.17G [00:07<00:19, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  29% 336M/1.17G [00:07<00:19, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  30% 346M/1.17G [00:08<00:19, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  31% 357M/1.17G [00:08<00:19, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  31% 367M/1.17G [00:08<00:18, 42.9MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  32% 377M/1.17G [00:08<00:18, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  33% 388M/1.17G [00:09<00:18, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  34% 398M/1.17G [00:09<00:18, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  35% 409M/1.17G [00:09<00:17, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  36% 419M/1.17G [00:09<00:17, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  37% 430M/1.17G [00:10<00:17, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  38% 440M/1.17G [00:10<00:17, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  39% 451M/1.17G [00:10<00:16, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  39% 461M/1.17G [00:10<00:16, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  40% 472M/1.17G [00:11<00:16, 42.0MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  41% 482M/1.17G [00:11<00:16, 42.1MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  42% 493M/1.17G [00:11<00:16, 42.0MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  43% 503M/1.17G [00:11<00:15, 42.1MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  44% 514M/1.17G [00:12<00:15, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  45% 524M/1.17G [00:12<00:15, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  46% 535M/1.17G [00:12<00:14, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  47% 545M/1.17G [00:12<00:14, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  48% 556M/1.17G [00:13<00:14, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  48% 566M/1.17G [00:13<00:14, 42.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  49% 577M/1.17G [00:13<00:13, 42.9MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  50% 587M/1.17G [00:13<00:13, 43.0MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  51% 598M/1.17G [00:14<00:13, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  52% 608M/1.17G [00:14<00:13, 42.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  53% 619M/1.17G [00:14<00:12, 42.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  54% 629M/1.17G [00:14<00:12, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  55% 640M/1.17G [00:15<00:12, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  56% 650M/1.17G [00:15<00:12, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  57% 661M/1.17G [00:15<00:11, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  57% 671M/1.17G [00:15<00:11, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  58% 682M/1.17G [00:16<00:11, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  59% 692M/1.17G [00:16<00:11, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  60% 703M/1.17G [00:16<00:10, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  61% 713M/1.17G [00:16<00:10, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  62% 724M/1.17G [00:17<00:10, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  63% 734M/1.17G [00:17<00:10, 42.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  64% 744M/1.17G [00:17<00:09, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  65% 755M/1.17G [00:17<00:09, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  66% 765M/1.17G [00:18<00:09, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  66% 776M/1.17G [00:18<00:09, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  67% 786M/1.17G [00:18<00:08, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  68% 797M/1.17G [00:18<00:08, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  69% 807M/1.17G [00:18<00:08, 42.8MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  70% 818M/1.17G [00:19<00:08, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  71% 828M/1.17G [00:19<00:07, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  72% 839M/1.17G [00:19<00:07, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  73% 849M/1.17G [00:19<00:07, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  74% 860M/1.17G [00:20<00:07, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  75% 870M/1.17G [00:20<00:07, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  75% 881M/1.17G [00:20<00:06, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  76% 891M/1.17G [00:20<00:06, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  77% 902M/1.17G [00:21<00:06, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  78% 912M/1.17G [00:21<00:06, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  79% 923M/1.17G [00:21<00:05, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  80% 933M/1.17G [00:21<00:05, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  81% 944M/1.17G [00:22<00:05, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  82% 954M/1.17G [00:22<00:05, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  83% 965M/1.17G [00:22<00:04, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  83% 975M/1.17G [00:22<00:04, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  84% 986M/1.17G [00:23<00:04, 41.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  85% 996M/1.17G [00:23<00:04, 42.1MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  86% 1.01G/1.17G [00:23<00:03, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  87% 1.02G/1.17G [00:23<00:03, 42.2MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  88% 1.03G/1.17G [00:24<00:03, 42.3MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  89% 1.04G/1.17G [00:24<00:03, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  90% 1.05G/1.17G [00:24<00:02, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  91% 1.06G/1.17G [00:24<00:02, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  92% 1.07G/1.17G [00:25<00:02, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  92% 1.08G/1.17G [00:25<00:02, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  93% 1.09G/1.17G [00:25<00:01, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  94% 1.10G/1.17G [00:25<00:01, 42.6MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  95% 1.11G/1.17G [00:26<00:01, 42.7MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  96% 1.12G/1.17G [00:26<00:01, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  97% 1.13G/1.17G [00:26<00:00, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  98% 1.14G/1.17G [00:26<00:00, 42.4MB/s]\u001b[A\n","model-00004-of-00004.safetensors:  99% 1.15G/1.17G [00:27<00:00, 42.5MB/s]\u001b[A\n","model-00004-of-00004.safetensors: 100% 1.17G/1.17G [00:27<00:00, 42.5MB/s]\n","Downloading shards: 100% 4/4 [06:25<00:00, 96.36s/it]\n","Loading checkpoint shards: 100% 4/4 [00:05<00:00,  1.26s/it]\n","generation_config.json: 100% 177/177 [00:00<00:00, 1.03MB/s]\n","tokenizer_config.json: 100% 50.6k/50.6k [00:00<00:00, 239kB/s]\n","tokenizer.json: 100% 9.09M/9.09M [00:01<00:00, 6.09MB/s]\n","special_tokens_map.json: 100% 73.0/73.0 [00:00<00:00, 562kB/s]\n","Map: 100% 627/627 [00:02<00:00, 212.76 examples/s]\n","Map: 100% 106/106 [00:00<00:00, 197.89 examples/s]\n","Filter: 100% 627/627 [00:00<00:00, 631.85 examples/s]\n","Filter: 100% 106/106 [00:00<00:00, 615.55 examples/s]\n","/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n","  warnings.warn(\n","[2024-11-05 20:30:46,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n","[2024-11-05 20:30:48,176] [INFO] [comm.py:652:init_distributed] cdb=None\n","[2024-11-05 20:30:48,177] [INFO] [comm.py:683:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n","Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n","Creating extension directory /root/.cache/torch_extensions/py310_cu121/fused_adam...\n","Detected CUDA files, patching ldflags\n","Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/fused_adam/build.ninja...\n","/usr/local/lib/python3.10/dist-packages/torch/utils/cpp_extension.py:1964: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. \n","If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].\n","  warnings.warn(\n","Building extension module fused_adam...\n","Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n","[1/3] /usr/local/cuda/bin/nvcc --generate-dependencies-with-compile --dependency-output multi_tensor_adam.cuda.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/includes -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam -isystem /usr/local/lib/python3.10/dist-packages/torch/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.10/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -U__CUDA_NO_BFLOAT16_CONVERSIONS__ -std=c++17 -c /usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n","[2/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/includes -I/usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam -isystem /usr/local/lib/python3.10/dist-packages/torch/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/torch/csrc/api/include -isystem /usr/local/lib/python3.10/dist-packages/torch/include/TH -isystem /usr/local/lib/python3.10/dist-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /usr/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DBF16_AVAILABLE -c /usr/local/lib/python3.10/dist-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o \n","[3/3] c++ fused_adam_frontend.o multi_tensor_adam.cuda.o -shared -L/usr/local/lib/python3.10/dist-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart -o fused_adam.so\n","Loading extension module fused_adam...\n","Time to load fused_adam op: 34.75411319732666 seconds\n","[2024-11-05 20:31:36,193] [WARNING] [lr_schedules.py:683:get_lr] Attempting to get learning rate from scheduler before it has started\n","\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.18.5\n","\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/content/drive/MyDrive/Colab Notebooks/AI4Finance/FinForecaster/Benchmark with Llama3 8b Data/wandb/run-20241105_203136-1n9fl409\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n","\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mllama3-8b-a100-5e-5lr\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/yl5440-columbia-university/Benchmark%20with%20Llama-3-8B\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/yl5440-columbia-university/Benchmark%20with%20Llama-3-8B/runs/1n9fl409\u001b[0m\n","  0% 0/195 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 11.1754, 'grad_norm': 0.0, 'learning_rate': 0, 'epoch': 0.25}\n","{'loss': 11.2416, 'grad_norm': 12.366005897521973, 'learning_rate': 1.9342640361727076e-05, 'epoch': 0.51}\n"," 10% 20/195 [09:56<1:25:25, 29.29s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:01<00:33,  1.52it/s]\u001b[A\n","  6% 3/53 [00:02<00:43,  1.16it/s]\u001b[A\n","  8% 4/53 [00:02<00:34,  1.44it/s]\u001b[A\n","  9% 5/53 [00:03<00:39,  1.21it/s]\u001b[A\n"," 11% 6/53 [00:05<00:42,  1.10it/s]\u001b[A\n"," 13% 7/53 [00:06<00:44,  1.03it/s]\u001b[A\n"," 15% 8/53 [00:07<00:45,  1.02s/it]\u001b[A\n"," 17% 9/53 [00:08<00:45,  1.02s/it]\u001b[A\n"," 19% 10/53 [00:08<00:35,  1.23it/s]\u001b[A\n"," 21% 11/53 [00:09<00:40,  1.05it/s]\u001b[A\n"," 23% 12/53 [00:10<00:40,  1.01it/s]\u001b[A\n"," 25% 13/53 [00:12<00:41,  1.04s/it]\u001b[A\n"," 26% 14/53 [00:13<00:41,  1.07s/it]\u001b[A\n"," 28% 15/53 [00:14<00:42,  1.11s/it]\u001b[A\n"," 30% 16/53 [00:14<00:32,  1.13it/s]\u001b[A\n"," 32% 17/53 [00:15<00:34,  1.04it/s]\u001b[A\n"," 34% 18/53 [00:16<00:27,  1.26it/s]\u001b[A\n"," 36% 19/53 [00:17<00:32,  1.06it/s]\u001b[A\n"," 38% 20/53 [00:18<00:33,  1.02s/it]\u001b[A\n"," 40% 21/53 [00:20<00:35,  1.10s/it]\u001b[A\n"," 42% 22/53 [00:21<00:36,  1.17s/it]\u001b[A\n"," 43% 23/53 [00:22<00:34,  1.14s/it]\u001b[A\n"," 45% 24/53 [00:23<00:32,  1.11s/it]\u001b[A\n"," 47% 25/53 [00:24<00:30,  1.08s/it]\u001b[A\n"," 49% 26/53 [00:25<00:28,  1.07s/it]\u001b[A\n"," 51% 27/53 [00:26<00:22,  1.15it/s]\u001b[A\n"," 53% 28/53 [00:27<00:23,  1.05it/s]\u001b[A\n"," 55% 29/53 [00:28<00:25,  1.05s/it]\u001b[A\n"," 57% 30/53 [00:29<00:25,  1.09s/it]\u001b[A\n"," 58% 31/53 [00:30<00:24,  1.09s/it]\u001b[A\n"," 60% 32/53 [00:31<00:23,  1.12s/it]\u001b[A\n"," 62% 33/53 [00:33<00:23,  1.16s/it]\u001b[A\n"," 64% 34/53 [00:34<00:22,  1.18s/it]\u001b[A\n"," 66% 35/53 [00:35<00:20,  1.16s/it]\u001b[A\n"," 68% 36/53 [00:36<00:18,  1.11s/it]\u001b[A\n"," 70% 37/53 [00:37<00:17,  1.08s/it]\u001b[A\n"," 72% 38/53 [00:38<00:16,  1.11s/it]\u001b[A\n"," 74% 39/53 [00:39<00:15,  1.10s/it]\u001b[A\n"," 75% 40/53 [00:40<00:11,  1.14it/s]\u001b[A\n"," 77% 41/53 [00:41<00:11,  1.09it/s]\u001b[A\n"," 79% 42/53 [00:42<00:10,  1.01it/s]\u001b[A\n"," 81% 43/53 [00:43<00:10,  1.04s/it]\u001b[A\n"," 83% 44/53 [00:44<00:09,  1.03s/it]\u001b[A\n"," 85% 45/53 [00:44<00:06,  1.24it/s]\u001b[A\n"," 87% 46/53 [00:45<00:06,  1.09it/s]\u001b[A\n"," 89% 47/53 [00:46<00:04,  1.33it/s]\u001b[A\n"," 91% 48/53 [00:46<00:03,  1.58it/s]\u001b[A\n"," 92% 49/53 [00:47<00:03,  1.31it/s]\u001b[A\n"," 94% 50/53 [00:48<00:02,  1.18it/s]\u001b[A\n"," 96% 51/53 [00:49<00:01,  1.10it/s]\u001b[A\n"," 98% 52/53 [00:50<00:00,  1.02it/s]\u001b[A\n","100% 53/53 [00:52<00:00,  1.10s/it]\u001b[A\n","{'eval_loss': 11.090203285217285, 'eval_runtime': 54.0892, 'eval_samples_per_second': 1.96, 'eval_steps_per_second': 0.98, 'epoch': 0.51}\n","\n"," 10% 20/195 [10:50<1:25:25, 29.29s/it]\n","{'loss': 4.3551, 'grad_norm': 1.6789228916168213, 'learning_rate': 5e-05, 'epoch': 0.76}\n","{'loss': 0.772, 'grad_norm': 1.038008451461792, 'learning_rate': 5e-05, 'epoch': 1.02}\n"," 21% 40/195 [19:48<53:24, 20.67s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:12,  4.20it/s]\u001b[A\n","  6% 3/53 [00:00<00:16,  3.06it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.77it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.77it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.72it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.62it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.60it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.72it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.75it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.65it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.70it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.84it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.91it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.83it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.84it/s]\u001b[A\n"," 32% 17/53 [00:06<00:12,  2.91it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.77it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.38it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.31it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.23it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.35it/s]\u001b[A\n"," 45% 24/53 [00:08<00:11,  2.56it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.74it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.77it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.66it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.51it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.54it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.37it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:12<00:07,  2.40it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.42it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:14<00:05,  2.79it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.82it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.76it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.81it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.91it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.84it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.11it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.16it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.09it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.98it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.92it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.86it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.85it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.82it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.58it/s]\u001b[A\n","100% 53/53 [00:19<00:00,  2.28it/s]\u001b[A\n","{'eval_loss': 0.5213888883590698, 'eval_runtime': 20.7331, 'eval_samples_per_second': 5.113, 'eval_steps_per_second': 2.556, 'epoch': 1.02}\n","\n"," 21% 40/195 [20:09<53:24, 20.67s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ATruncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n","Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:00<00:28,  1.74it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [00:00<00:15,  3.01it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [00:00<00:11,  4.11it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [00:01<00:09,  5.08it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [00:01<00:09,  4.74it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [00:01<00:07,  5.59it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [00:01<00:07,  6.05it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [00:01<00:06,  6.75it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [00:01<00:04,  8.00it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [00:01<00:05,  7.68it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [00:02<00:04,  8.14it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [00:02<00:04,  7.41it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [00:02<00:04,  7.69it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [00:02<00:05,  6.30it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [00:02<00:05,  6.69it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [00:02<00:04,  7.39it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [00:02<00:04,  7.18it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [00:03<00:04,  7.20it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [00:03<00:04,  7.14it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [00:03<00:03,  7.50it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [00:03<00:03,  6.91it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [00:03<00:03,  7.37it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [00:03<00:03,  7.92it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [00:03<00:03,  7.97it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [00:04<00:02,  7.73it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [00:04<00:02,  7.58it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [00:04<00:03,  6.41it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [00:04<00:03,  6.62it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [00:04<00:03,  6.28it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [00:04<00:02,  6.88it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [00:05<00:02,  7.38it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [00:05<00:02,  7.34it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [00:05<00:02,  7.46it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [00:05<00:01,  7.65it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [00:05<00:01,  7.96it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [00:05<00:01,  7.90it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [00:05<00:01,  7.88it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [00:05<00:01,  7.66it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [00:06<00:01,  7.74it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [00:06<00:01,  6.76it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [00:06<00:00,  7.18it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [00:06<00:00,  7.02it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [00:06<00:00,  6.17it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [00:06<00:00,  6.14it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [00:07<00:00,  6.11it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [00:07<00:00,  6.57it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [00:07<00:00,  6.66it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [00:07<00:00,  6.73it/s]\n","\n","{'loss': 0.4499, 'grad_norm': 0.2876254618167877, 'learning_rate': 5e-05, 'epoch': 1.27}\n","{'loss': 0.3768, 'grad_norm': 0.16913160681724548, 'learning_rate': 5e-05, 'epoch': 1.53}\n"," 31% 60/195 [25:52<34:25, 15.30s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:12,  4.21it/s]\u001b[A\n","  6% 3/53 [00:00<00:16,  3.06it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.76it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.77it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.72it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.62it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.60it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.72it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.75it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.64it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.70it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.83it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.91it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.83it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.84it/s]\u001b[A\n"," 32% 17/53 [00:06<00:12,  2.91it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.77it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.37it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.31it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.23it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.35it/s]\u001b[A\n"," 45% 24/53 [00:09<00:11,  2.56it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.74it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.77it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.66it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.51it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.54it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.37it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:13<00:07,  2.39it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.42it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.82it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.76it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.81it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.91it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.97it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.06it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.12it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.07it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.96it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.91it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.86it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.84it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.82it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.58it/s]\u001b[A\n","100% 53/53 [00:19<00:00,  2.22it/s]\u001b[A\n","{'eval_loss': 0.3662136495113373, 'eval_runtime': 20.8419, 'eval_samples_per_second': 5.086, 'eval_steps_per_second': 2.543, 'epoch': 1.53}\n","\n"," 31% 60/195 [26:13<34:25, 15.30s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:32<26:09, 32.03s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [00:58<22:54, 28.63s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [01:09<16:16, 20.78s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [01:11<10:09, 13.25s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [01:32<12:05, 16.12s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [01:40<09:41, 13.22s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [01:55<09:54, 13.83s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [02:01<08:01, 11.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 18% 9/50 [02:18<08:54, 13.03s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [02:43<11:12, 16.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [03:13<13:38, 20.98s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [03:27<11:51, 18.72s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [03:42<10:52, 17.65s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [03:55<09:42, 16.17s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [04:17<10:29, 17.98s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [04:45<11:55, 21.05s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [04:53<09:18, 16.92s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [05:15<09:57, 18.66s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [05:46<11:30, 22.27s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 40% 20/50 [05:51<08:33, 17.12s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [06:20<10:02, 20.78s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [06:38<09:13, 19.75s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [06:52<08:05, 17.99s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [07:18<08:51, 20.45s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [07:29<07:23, 17.75s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [08:06<09:23, 23.49s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [08:31<09:11, 23.97s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [09:01<09:27, 25.78s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [09:30<09:20, 26.70s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [09:49<08:04, 24.24s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [10:08<07:13, 22.84s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [10:13<05:13, 17.40s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [10:44<06:04, 21.42s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [11:05<05:41, 21.37s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [11:28<05:27, 21.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [11:44<04:41, 20.13s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [11:59<04:03, 18.71s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [12:21<03:54, 19.55s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [13:09<05:11, 28.28s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [13:29<04:15, 25.55s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [13:48<03:32, 23.59s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [14:09<03:03, 22.97s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [14:22<02:19, 20.00s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [14:47<02:07, 21.27s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [15:18<02:01, 24.34s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [15:30<01:22, 20.52s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [15:43<00:55, 18.47s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [15:53<00:31, 15.68s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [16:13<00:17, 17.25s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [16:40<00:00, 20.00s/it]\n","\n","Binary Accuracy: 0.42  |  Mean Square Error: 14.54\n","\n","Rouge Score of Positive Developments: {'rouge1': 0.41606106651410346, 'rouge2': 0.12795040444610467, 'rougeL': 0.234441544928749}\n","\n","Rouge Score of Potential Concerns: {'rouge1': 0.3769949353793691, 'rouge2': 0.10293349907151338, 'rougeL': 0.22161576538153285}\n","\n","Rouge Score of Summary Analysis: {'rouge1': 0.4351405152338609, 'rouge2': 0.12262939084559941, 'rougeL': 0.22452109956144076}\n","\n","                                   \u001b[A/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 0.3529, 'grad_norm': 0.20882779359817505, 'learning_rate': 5e-05, 'epoch': 1.78}\n","{'loss': 0.3494, 'grad_norm': 0.20099438726902008, 'learning_rate': 5e-05, 'epoch': 2.04}\n"," 41% 80/195 [48:05<27:32, 14.37s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:12,  4.23it/s]\u001b[A\n","  6% 3/53 [00:00<00:16,  3.06it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.76it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.77it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.71it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.62it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.60it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.72it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.75it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.64it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.70it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.84it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.91it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.83it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.84it/s]\u001b[A\n"," 32% 17/53 [00:06<00:12,  2.91it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.77it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.37it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.31it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.23it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.35it/s]\u001b[A\n"," 45% 24/53 [00:09<00:11,  2.56it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.74it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.77it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.66it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.51it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.54it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.37it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:12<00:07,  2.39it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.42it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.81it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.76it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.80it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.91it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.96it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.06it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.12it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.07it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.96it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.91it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.86it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.84it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.81it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.58it/s]\u001b[A\n","100% 53/53 [00:19<00:00,  2.23it/s]\u001b[A\n","{'eval_loss': 0.3417072594165802, 'eval_runtime': 20.8833, 'eval_samples_per_second': 5.076, 'eval_steps_per_second': 2.538, 'epoch': 2.04}\n","\n"," 41% 80/195 [48:26<27:32, 14.37s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:25<20:41, 25.34s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [01:03<26:14, 32.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [01:16<18:32, 23.67s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [01:47<20:36, 26.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [02:16<20:32, 27.38s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [02:37<18:35, 25.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [03:12<20:21, 28.40s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [03:33<18:14, 26.07s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 18% 9/50 [03:44<14:42, 21.54s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [04:13<15:51, 23.78s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [04:44<16:52, 25.97s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [05:16<17:39, 27.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [05:46<17:24, 28.23s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [05:49<12:31, 20.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [06:20<13:57, 23.93s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [06:48<14:08, 24.95s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [07:09<13:10, 23.94s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [07:46<14:46, 27.71s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [08:11<13:55, 26.97s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 40% 20/50 [08:37<13:17, 26.60s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [09:22<15:29, 32.05s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [09:44<13:34, 29.09s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [10:18<13:49, 30.74s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [10:29<10:45, 24.83s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [10:56<10:37, 25.51s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [11:29<11:05, 27.72s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [12:05<11:34, 30.19s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [12:41<11:37, 31.72s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [13:10<10:54, 31.15s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [13:24<08:36, 25.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [13:39<07:08, 22.56s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [13:48<05:32, 18.48s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [14:10<05:32, 19.56s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [14:44<06:23, 23.94s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [15:13<06:21, 25.44s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [15:46<06:29, 27.80s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [16:23<06:36, 30.51s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [16:52<06:02, 30.18s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [17:16<05:10, 28.23s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [17:51<05:01, 30.17s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [18:15<04:14, 28.27s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [18:32<03:20, 25.10s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [18:44<02:27, 21.07s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [19:11<02:17, 22.84s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [19:37<01:58, 23.69s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [20:08<01:44, 26.13s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [20:36<01:19, 26.42s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [21:02<00:52, 26.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [21:39<00:29, 29.53s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [22:03<00:00, 26.47s/it]\n","\n","Binary Accuracy: 0.65  |  Mean Square Error: 15.68\n","\n","Rouge Score of Positive Developments: {'rouge1': 0.4716999843582193, 'rouge2': 0.1650946687953232, 'rougeL': 0.28730460403807767}\n","\n","Rouge Score of Potential Concerns: {'rouge1': 0.42960717838069745, 'rouge2': 0.13165073566919555, 'rougeL': 0.26045548775772576}\n","\n","Rouge Score of Summary Analysis: {'rouge1': 0.4514846586535601, 'rouge2': 0.13489349664547226, 'rougeL': 0.2199553706387686}\n","\n","                                   \u001b[A/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 0.3311, 'grad_norm': 0.17615191638469696, 'learning_rate': 5e-05, 'epoch': 2.29}\n","{'loss': 0.3238, 'grad_norm': 0.16709819436073303, 'learning_rate': 5e-05, 'epoch': 2.55}\n"," 51% 100/195 [1:15:22<21:30, 13.58s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:11,  4.51it/s]\u001b[A\n","  6% 3/53 [00:00<00:15,  3.15it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.81it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.80it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.73it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.63it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.60it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.73it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.76it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.65it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.70it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.84it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.90it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.82it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.84it/s]\u001b[A\n"," 32% 17/53 [00:05<00:12,  2.91it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.76it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.37it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.31it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.23it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.35it/s]\u001b[A\n"," 45% 24/53 [00:08<00:11,  2.56it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.74it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.77it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.66it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.51it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.54it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.37it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:12<00:07,  2.39it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.41it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:13<00:05,  2.78it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.82it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.76it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.80it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.91it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.96it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.05it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.12it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.07it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.96it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.91it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.86it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.84it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.81it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.58it/s]\u001b[A\n","100% 53/53 [00:19<00:00,  2.20it/s]\u001b[A\n","{'eval_loss': 0.3291509449481964, 'eval_runtime': 20.9401, 'eval_samples_per_second': 5.062, 'eval_steps_per_second': 2.531, 'epoch': 2.55}\n","\n"," 51% 100/195 [1:15:43<21:30, 13.58s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:23<19:29, 23.87s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [00:58<24:09, 30.20s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [01:11<17:27, 22.29s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [01:33<17:11, 22.42s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [02:04<18:53, 25.19s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [02:22<16:47, 22.90s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [02:50<17:39, 24.63s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [03:10<16:03, 22.94s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 18% 9/50 [03:22<13:29, 19.73s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [03:44<13:33, 20.35s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [04:11<14:36, 22.48s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [04:36<14:35, 23.05s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [05:04<15:15, 24.73s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [05:27<14:24, 24.02s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [05:54<14:32, 24.92s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [06:18<14:02, 24.77s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [06:41<13:23, 24.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [07:06<13:05, 24.54s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [07:31<12:39, 24.51s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 40% 20/50 [07:54<12:07, 24.24s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [08:25<12:34, 26.03s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [08:47<11:36, 24.89s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [09:10<10:57, 24.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [09:30<10:00, 23.08s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [09:52<09:26, 22.65s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [10:24<10:11, 25.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [10:43<09:04, 23.67s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [11:06<08:34, 23.38s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [11:54<10:47, 30.85s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [12:13<09:01, 27.08s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [12:31<07:46, 24.58s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [12:56<07:23, 24.67s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [13:14<06:26, 22.76s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [13:44<06:35, 24.73s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [14:08<06:09, 24.61s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [14:29<05:30, 23.58s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [14:50<04:56, 22.83s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [15:21<05:01, 25.10s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [15:44<04:29, 24.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [16:04<03:52, 23.27s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [16:29<03:32, 23.61s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [17:01<03:28, 26.11s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [17:22<02:53, 24.80s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [17:48<02:29, 24.93s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [18:14<02:06, 25.34s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [18:41<01:43, 25.87s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [19:02<01:13, 24.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [19:22<00:45, 22.97s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [19:48<00:23, 23.98s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [20:10<00:00, 24.22s/it]\n","\n","Binary Accuracy: 0.50  |  Mean Square Error: 22.00\n","\n","Rouge Score of Positive Developments: {'rouge1': 0.4774418186967879, 'rouge2': 0.17861635482739915, 'rougeL': 0.2917573406860742}\n","\n","Rouge Score of Potential Concerns: {'rouge1': 0.43357233220273056, 'rouge2': 0.14673480307878184, 'rougeL': 0.2708503390258403}\n","\n","Rouge Score of Summary Analysis: {'rouge1': 0.45914946661604256, 'rouge2': 0.1358080888044505, 'rougeL': 0.23244567384363002}\n","\n","                                   \u001b[A/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 0.3102, 'grad_norm': 0.19316884875297546, 'learning_rate': 5e-05, 'epoch': 2.8}\n","{'loss': 0.3134, 'grad_norm': 0.17230817675590515, 'learning_rate': 5e-05, 'epoch': 3.06}\n"," 62% 120/195 [1:40:37<16:31, 13.22s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:12,  4.22it/s]\u001b[A\n","  6% 3/53 [00:00<00:16,  3.06it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.77it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.77it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.71it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.62it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.60it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.72it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.75it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.64it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.69it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.83it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.90it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.82it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.84it/s]\u001b[A\n"," 32% 17/53 [00:06<00:12,  2.91it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.76it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.37it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.31it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.23it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.35it/s]\u001b[A\n"," 45% 24/53 [00:09<00:11,  2.56it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.74it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.77it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.66it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.51it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.54it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.37it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:13<00:07,  2.39it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.42it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.81it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.76it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.80it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.91it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.97it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.06it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.12it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.07it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.96it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.91it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.86it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.84it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.81it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.58it/s]\u001b[A\n","100% 53/53 [00:19<00:00,  2.23it/s]\u001b[A\n","{'eval_loss': 0.32264962792396545, 'eval_runtime': 20.9617, 'eval_samples_per_second': 5.057, 'eval_steps_per_second': 2.528, 'epoch': 3.06}\n","\n"," 62% 120/195 [1:40:58<16:31, 13.22s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:21<17:23, 21.29s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [00:50<20:38, 25.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [01:15<19:55, 25.44s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [01:29<16:02, 20.92s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [01:59<18:18, 24.42s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [02:19<16:35, 22.63s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [02:47<17:41, 24.69s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [03:10<16:52, 24.10s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 18% 9/50 [03:32<15:52, 23.24s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [03:56<15:41, 23.53s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [04:21<15:41, 24.15s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [04:45<15:05, 23.83s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [05:05<14:07, 22.91s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [05:40<15:53, 26.50s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [06:16<17:02, 29.23s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [06:45<16:31, 29.17s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [07:05<14:33, 26.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [07:32<14:13, 26.66s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [07:59<13:50, 26.80s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 40% 20/50 [08:26<13:20, 26.70s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [09:00<14:03, 29.08s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [09:21<12:28, 26.73s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [09:52<12:30, 27.78s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [10:17<11:45, 27.12s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [10:37<10:25, 25.01s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [11:06<10:25, 26.06s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [11:37<10:34, 27.58s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [12:03<09:58, 27.21s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [12:39<10:25, 29.78s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [13:07<09:42, 29.13s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [13:28<08:28, 26.75s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [13:50<07:34, 25.23s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [14:09<06:41, 23.62s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [14:34<06:22, 23.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [14:56<05:47, 23.19s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [15:27<05:58, 25.59s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [15:52<05:30, 25.42s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [16:20<05:14, 26.20s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [16:50<05:01, 27.43s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [17:12<04:18, 25.87s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [17:34<03:41, 24.58s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [18:06<03:34, 26.78s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [18:26<02:52, 24.71s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [18:49<02:26, 24.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [19:24<02:17, 27.56s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [19:51<01:49, 27.44s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [20:11<01:15, 25.20s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [20:35<00:49, 24.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [21:00<00:24, 24.92s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [21:29<00:00, 25.80s/it]\n","\n","Binary Accuracy: 0.57  |  Mean Square Error: 21.37\n","\n","Rouge Score of Positive Developments: {'rouge1': 0.4890500489549413, 'rouge2': 0.1773401744243524, 'rougeL': 0.2927300325119179}\n","\n","Rouge Score of Potential Concerns: {'rouge1': 0.4451979451538087, 'rouge2': 0.1488862538435474, 'rougeL': 0.28203584327279174}\n","\n","Rouge Score of Summary Analysis: {'rouge1': 0.4821893020794219, 'rouge2': 0.1515979029020472, 'rougeL': 0.2407330299032574}\n","\n","                                   \u001b[A/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 0.3059, 'grad_norm': 0.166546031832695, 'learning_rate': 5e-05, 'epoch': 3.31}\n","{'loss': 0.3053, 'grad_norm': 0.16894438862800598, 'learning_rate': 5e-05, 'epoch': 3.57}\n"," 72% 140/195 [2:06:59<11:38, 12.71s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:12,  4.21it/s]\u001b[A\n","  6% 3/53 [00:00<00:16,  3.06it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.76it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.76it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.71it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.62it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.59it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.72it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.75it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.64it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.69it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.83it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.90it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.82it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.83it/s]\u001b[A\n"," 32% 17/53 [00:06<00:12,  2.90it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.76it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.37it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.30it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.22it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.34it/s]\u001b[A\n"," 45% 24/53 [00:09<00:11,  2.56it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.73it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.76it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.65it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.50it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.53it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.36it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:13<00:07,  2.39it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.41it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.81it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.76it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.80it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.90it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.96it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.05it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.12it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.06it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.96it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.91it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.85it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.83it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.81it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.57it/s]\u001b[A\n","100% 53/53 [00:19<00:00,  2.19it/s]\u001b[A\n","{'eval_loss': 0.3188345432281494, 'eval_runtime': 21.001, 'eval_samples_per_second': 5.047, 'eval_steps_per_second': 2.524, 'epoch': 3.57}\n","\n"," 72% 140/195 [2:07:20<11:38, 12.71s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:25<20:48, 25.47s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [00:53<21:25, 26.77s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [01:17<20:00, 25.54s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [01:38<18:18, 23.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [02:07<19:21, 25.82s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [02:31<18:27, 25.18s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [03:00<18:54, 26.38s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [03:22<17:28, 24.95s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 18% 9/50 [03:45<16:36, 24.32s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [04:14<17:15, 25.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [04:39<16:36, 25.56s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [05:04<16:01, 25.29s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [05:28<15:26, 25.04s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [05:54<15:08, 25.25s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [06:29<16:28, 28.24s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [06:55<15:38, 27.60s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [07:17<14:17, 25.97s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [07:45<14:06, 26.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [08:13<13:55, 26.96s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 40% 20/50 [08:25<11:15, 22.53s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [08:57<12:09, 25.15s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [09:20<11:24, 24.46s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [09:47<11:24, 25.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [10:09<10:34, 24.41s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [10:32<09:57, 23.90s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [11:03<10:23, 25.98s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [11:23<09:16, 24.19s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [11:49<09:03, 24.71s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [12:18<09:09, 26.19s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [12:41<08:22, 25.14s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [13:14<08:40, 27.39s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [13:33<07:32, 25.13s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [13:56<06:54, 24.39s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [14:24<06:45, 25.33s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [14:53<06:38, 26.54s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [15:22<06:20, 27.18s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [15:42<05:27, 25.20s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [16:11<05:15, 26.28s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [16:36<04:45, 25.94s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [17:05<04:28, 26.80s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [17:28<03:51, 25.71s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [17:58<03:35, 26.95s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [18:21<02:59, 25.64s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [18:41<02:24, 24.03s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [19:12<02:10, 26.12s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [19:37<01:43, 25.90s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [19:58<01:12, 24.23s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [20:20<00:47, 23.58s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [20:40<00:22, 22.57s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [21:05<00:00, 25.30s/it]\n","\n","Binary Accuracy: 0.41  |  Mean Square Error: 30.59\n","\n","Rouge Score of Positive Developments: {'rouge1': 0.4979171091797699, 'rouge2': 0.1883758892118253, 'rougeL': 0.29957578951708813}\n","\n","Rouge Score of Potential Concerns: {'rouge1': 0.43494689372069495, 'rouge2': 0.13444266214369374, 'rougeL': 0.26652643126510295}\n","\n","Rouge Score of Summary Analysis: {'rouge1': 0.4641331980487564, 'rouge2': 0.1370356454621488, 'rougeL': 0.23267064225697104}\n","\n","                                   \u001b[A/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 0.3078, 'grad_norm': 0.2028413563966751, 'learning_rate': 5e-05, 'epoch': 3.82}\n","{'loss': 0.3023, 'grad_norm': 0.17937177419662476, 'learning_rate': 5e-05, 'epoch': 4.08}\n"," 82% 160/195 [2:32:51<07:47, 13.34s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:12,  4.21it/s]\u001b[A\n","  6% 3/53 [00:00<00:16,  3.06it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.76it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.76it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.71it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.62it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.59it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.72it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.75it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.64it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.70it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.83it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.90it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.82it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.83it/s]\u001b[A\n"," 32% 17/53 [00:06<00:12,  2.91it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.76it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.37it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.30it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.22it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.34it/s]\u001b[A\n"," 45% 24/53 [00:09<00:11,  2.55it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.73it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.76it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.65it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.51it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.53it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.36it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:13<00:07,  2.39it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.42it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.81it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.78it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.75it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.80it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.90it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.96it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.05it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.11it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.06it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.96it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.90it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.85it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.83it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.81it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.57it/s]\u001b[A\n","100% 53/53 [00:20<00:00,  2.09it/s]\u001b[A\n","{'eval_loss': 0.3156338930130005, 'eval_runtime': 21.1024, 'eval_samples_per_second': 5.023, 'eval_steps_per_second': 2.512, 'epoch': 4.08}\n","\n"," 82% 160/195 [2:33:13<07:47, 13.34s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:26<21:16, 26.05s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [00:58<23:56, 29.92s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [01:24<22:08, 28.27s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [01:45<19:13, 25.07s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [02:05<17:31, 23.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [02:24<16:02, 21.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [02:51<16:58, 23.69s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [03:15<16:36, 23.73s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 18% 9/50 [03:36<15:32, 22.74s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [04:04<16:14, 24.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [04:32<16:41, 25.67s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [04:53<15:19, 24.19s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [05:22<15:49, 25.65s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [05:54<16:25, 27.39s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [06:27<16:58, 29.10s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [06:52<15:47, 27.88s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [07:13<14:18, 26.00s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [07:35<13:07, 24.59s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [08:03<13:14, 25.64s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 40% 20/50 [08:25<12:15, 24.53s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [09:00<13:27, 27.84s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [09:21<12:00, 25.74s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [09:48<11:48, 26.22s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [10:17<11:37, 26.83s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [10:37<10:23, 24.93s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [11:04<10:15, 25.63s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [11:29<09:39, 25.21s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [12:01<09:59, 27.23s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [12:39<10:43, 30.66s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [13:06<09:51, 29.56s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [13:33<09:03, 28.63s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [13:52<07:44, 25.82s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [14:18<07:20, 25.91s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [14:48<07:14, 27.13s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [15:15<06:44, 26.96s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [15:45<06:30, 27.87s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [15:59<05:09, 23.79s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [16:27<05:01, 25.13s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [16:52<04:35, 25.04s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [17:13<03:57, 23.73s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [17:40<03:44, 24.95s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [18:16<03:44, 28.03s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [18:42<03:13, 27.64s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [19:05<02:36, 26.09s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [19:40<02:23, 28.66s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [20:07<01:52, 28.20s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [20:36<01:25, 28.62s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [21:00<00:54, 27.08s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [21:31<00:28, 28.22s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [21:54<00:00, 26.29s/it]\n","\n","Binary Accuracy: 0.54  |  Mean Square Error: 16.29\n","\n","Rouge Score of Positive Developments: {'rouge1': 0.5080848211484772, 'rouge2': 0.19487039066531117, 'rougeL': 0.31814966304858067}\n","\n","Rouge Score of Potential Concerns: {'rouge1': 0.46455219983832974, 'rouge2': 0.15551448311682578, 'rougeL': 0.2840730890275682}\n","\n","Rouge Score of Summary Analysis: {'rouge1': 0.47214553602038123, 'rouge2': 0.14468629593723306, 'rougeL': 0.2376821603285981}\n","\n","                                   \u001b[A/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 0.2903, 'grad_norm': 0.18641026318073273, 'learning_rate': 5e-05, 'epoch': 4.33}\n","{'loss': 0.2917, 'grad_norm': 0.17536139488220215, 'learning_rate': 5e-05, 'epoch': 4.59}\n"," 92% 180/195 [2:59:35<03:07, 12.49s/it]\n","  0% 0/53 [00:00<?, ?it/s]\u001b[A\n","  4% 2/53 [00:00<00:12,  4.21it/s]\u001b[A\n","  6% 3/53 [00:00<00:16,  3.06it/s]\u001b[A\n","  8% 4/53 [00:01<00:17,  2.76it/s]\u001b[A\n","  9% 5/53 [00:01<00:17,  2.76it/s]\u001b[A\n"," 11% 6/53 [00:02<00:17,  2.71it/s]\u001b[A\n"," 13% 7/53 [00:02<00:17,  2.61it/s]\u001b[A\n"," 15% 8/53 [00:02<00:17,  2.59it/s]\u001b[A\n"," 17% 9/53 [00:03<00:16,  2.71it/s]\u001b[A\n"," 19% 10/53 [00:03<00:15,  2.75it/s]\u001b[A\n"," 21% 11/53 [00:03<00:15,  2.64it/s]\u001b[A\n"," 23% 12/53 [00:04<00:15,  2.69it/s]\u001b[A\n"," 25% 13/53 [00:04<00:14,  2.83it/s]\u001b[A\n"," 26% 14/53 [00:04<00:13,  2.90it/s]\u001b[A\n"," 28% 15/53 [00:05<00:13,  2.82it/s]\u001b[A\n"," 30% 16/53 [00:05<00:13,  2.83it/s]\u001b[A\n"," 32% 17/53 [00:06<00:12,  2.90it/s]\u001b[A\n"," 34% 18/53 [00:06<00:12,  2.75it/s]\u001b[A\n"," 36% 19/53 [00:06<00:13,  2.55it/s]\u001b[A\n"," 38% 20/53 [00:07<00:13,  2.37it/s]\u001b[A\n"," 40% 21/53 [00:07<00:13,  2.30it/s]\u001b[A\n"," 42% 22/53 [00:08<00:13,  2.22it/s]\u001b[A\n"," 43% 23/53 [00:08<00:12,  2.35it/s]\u001b[A\n"," 45% 24/53 [00:09<00:11,  2.56it/s]\u001b[A\n"," 47% 25/53 [00:09<00:10,  2.73it/s]\u001b[A\n"," 49% 26/53 [00:09<00:09,  2.77it/s]\u001b[A\n"," 51% 27/53 [00:10<00:09,  2.66it/s]\u001b[A\n"," 53% 28/53 [00:10<00:09,  2.53it/s]\u001b[A\n"," 55% 29/53 [00:10<00:09,  2.51it/s]\u001b[A\n"," 57% 30/53 [00:11<00:08,  2.61it/s]\u001b[A\n"," 58% 31/53 [00:11<00:08,  2.53it/s]\u001b[A\n"," 60% 32/53 [00:12<00:08,  2.36it/s]\u001b[A\n"," 62% 33/53 [00:12<00:08,  2.39it/s]\u001b[A\n"," 64% 34/53 [00:13<00:07,  2.39it/s]\u001b[A\n"," 66% 35/53 [00:13<00:07,  2.42it/s]\u001b[A\n"," 68% 36/53 [00:13<00:06,  2.63it/s]\u001b[A\n"," 70% 37/53 [00:14<00:05,  2.77it/s]\u001b[A\n"," 72% 38/53 [00:14<00:05,  2.81it/s]\u001b[A\n"," 74% 39/53 [00:14<00:05,  2.77it/s]\u001b[A\n"," 75% 40/53 [00:15<00:04,  2.76it/s]\u001b[A\n"," 77% 41/53 [00:15<00:04,  2.80it/s]\u001b[A\n"," 79% 42/53 [00:15<00:03,  2.90it/s]\u001b[A\n"," 81% 43/53 [00:16<00:03,  2.96it/s]\u001b[A\n"," 83% 44/53 [00:16<00:02,  3.05it/s]\u001b[A\n"," 85% 45/53 [00:16<00:02,  3.12it/s]\u001b[A\n"," 87% 46/53 [00:17<00:02,  3.06it/s]\u001b[A\n"," 89% 47/53 [00:17<00:02,  2.96it/s]\u001b[A\n"," 91% 48/53 [00:17<00:01,  2.91it/s]\u001b[A\n"," 92% 49/53 [00:18<00:01,  2.85it/s]\u001b[A\n"," 94% 50/53 [00:18<00:01,  2.84it/s]\u001b[A\n"," 96% 51/53 [00:18<00:00,  2.81it/s]\u001b[A\n"," 98% 52/53 [00:19<00:00,  2.58it/s]\u001b[A\n","100% 53/53 [00:19<00:00,  2.22it/s]\u001b[A\n","{'eval_loss': 0.31385818123817444, 'eval_runtime': 20.9531, 'eval_samples_per_second': 5.059, 'eval_steps_per_second': 2.529, 'epoch': 4.59}\n","\n"," 92% 180/195 [2:59:56<03:07, 12.49s/it]\n","\n","  0% 0/50 [00:00<?, ?it/s]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  2% 1/50 [00:29<23:49, 29.18s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  4% 2/50 [00:59<24:05, 30.12s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  6% 3/50 [01:24<21:41, 27.70s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","  8% 4/50 [01:46<19:26, 25.36s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 10% 5/50 [02:14<19:36, 26.15s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 12% 6/50 [02:35<18:04, 24.65s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 14% 7/50 [03:01<17:50, 24.89s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 16% 8/50 [03:22<16:34, 23.68s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 18% 9/50 [03:44<15:49, 23.15s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 20% 10/50 [04:07<15:25, 23.15s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 22% 11/50 [04:29<14:46, 22.72s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 24% 12/50 [04:54<14:50, 23.44s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 26% 13/50 [05:19<14:51, 24.10s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 28% 14/50 [05:49<15:23, 25.65s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 30% 15/50 [06:25<16:53, 28.94s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 32% 16/50 [06:51<15:47, 27.86s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 34% 17/50 [07:12<14:18, 26.02s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 36% 18/50 [07:40<14:11, 26.62s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 38% 19/50 [08:08<13:58, 27.05s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 40% 20/50 [08:23<11:38, 23.29s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 42% 21/50 [08:55<12:31, 25.92s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 44% 22/50 [09:21<12:09, 26.06s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 46% 23/50 [09:52<12:22, 27.51s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 48% 24/50 [10:18<11:42, 27.01s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 50% 25/50 [10:39<10:29, 25.20s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 52% 26/50 [11:10<10:49, 27.06s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 54% 27/50 [11:42<10:53, 28.40s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 56% 28/50 [12:07<10:05, 27.51s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 58% 29/50 [12:36<09:47, 27.98s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 60% 30/50 [12:58<08:42, 26.11s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 62% 31/50 [13:27<08:33, 27.05s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 64% 32/50 [13:53<07:59, 26.63s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 66% 33/50 [14:13<06:56, 24.47s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 68% 34/50 [14:42<06:56, 26.01s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 70% 35/50 [15:10<06:39, 26.64s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 72% 36/50 [15:47<06:53, 29.56s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 74% 37/50 [16:13<06:11, 28.57s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 76% 38/50 [16:40<05:39, 28.27s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 78% 39/50 [17:04<04:54, 26.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 80% 40/50 [17:31<04:27, 26.77s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 82% 41/50 [17:55<03:55, 26.11s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 84% 42/50 [18:30<03:50, 28.81s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 86% 43/50 [18:58<03:19, 28.47s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 88% 44/50 [19:25<02:48, 28.09s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 90% 45/50 [19:56<02:24, 28.94s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 92% 46/50 [20:21<01:50, 27.60s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 94% 47/50 [20:43<01:18, 26.03s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 96% 48/50 [21:04<00:49, 24.62s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n"," 98% 49/50 [21:32<00:25, 25.61s/it]\u001b[A\u001b[ASetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n","\n","\n","100% 50/50 [22:01<00:00, 26.43s/it]\n","\n","Binary Accuracy: 0.55  |  Mean Square Error: 24.73\n","\n","Rouge Score of Positive Developments: {'rouge1': 0.49779839682943144, 'rouge2': 0.18736558585361682, 'rougeL': 0.31137081573002817}\n","\n","Rouge Score of Potential Concerns: {'rouge1': 0.4390760748436198, 'rouge2': 0.14459793232400112, 'rougeL': 0.266724061054437}\n","\n","Rouge Score of Summary Analysis: {'rouge1': 0.46203814488900574, 'rouge2': 0.1381240782207603, 'rougeL': 0.23691466373407405}\n","\n","                                   \u001b[A/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py:825: UserWarning: cuDNN SDPA backward got grad_output.strides() != output.strides(), attempting to materialize a grad_output with matching strides... (Triggered internally at ../aten/src/ATen/native/cudnn/MHA.cpp:674.)\n","  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n","{'loss': 0.2948, 'grad_norm': 0.19432808458805084, 'learning_rate': 5e-05, 'epoch': 4.84}\n","{'train_runtime': 12331.7573, 'train_samples_per_second': 0.254, 'train_steps_per_second': 0.016, 'train_loss': 1.6720150177295392, 'epoch': 4.97}\n","100% 195/195 [3:25:30<00:00, 63.23s/it]\n","\u001b[1;34mwandb\u001b[0m: 🚀 View run \u001b[33mllama3-8b-a100-5e-5lr\u001b[0m at: \u001b[34mhttps://wandb.ai/yl5440-columbia-university/Benchmark with Llama-3-8B/runs/1n9fl409\u001b[0m\n","\u001b[1;34mwandb\u001b[0m: Find logs at: \u001b[1;35mwandb/run-20241105_203136-1n9fl409/logs\u001b[0m\n","[rank0]:[W1105 23:57:13.969887699 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present,  but this warning has only been added since PyTorch 2.4 (function operator())\n","[2024-11-05 23:57:17,490] [INFO] [launch.py:351:main] Process 3936 exits successfully.\n"]}],"source":["!bash train.sh"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"iKwRTkc-LNji"},"outputs":[],"source":[]}],"metadata":{"colab":{"machine_shape":"hm","provenance":[],"gpuType":"A100","toc_visible":true,"authorship_tag":"ABX9TyOkW5IxBD6LtGbLK18bqihs"},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"ec45cd4da15f4b998a1e185764f5b16d":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e57edd2bb9b74676bfdcf66b12c8cc5c","IPY_MODEL_f3f7a941eff946d88f082a482fec1f11","IPY_MODEL_857ccdeccef04eddb1a833c0cfb8535f"],"layout":"IPY_MODEL_74e35720b9074ccdba2e495ffd0895dd"}},"e57edd2bb9b74676bfdcf66b12c8cc5c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3927b957970548da96f830871fffdbf9","placeholder":"​","style":"IPY_MODEL_b666036aa98f42d48a97b224da3fd77c","value":"Map: 100%"}},"f3f7a941eff946d88f082a482fec1f11":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_41032241335949a18ed6d8b7e167b581","max":784,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e5e886e61cf549539175829a2274fdb4","value":784}},"857ccdeccef04eddb1a833c0cfb8535f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f7fcd72791de411983343b0e69dbddeb","placeholder":"​","style":"IPY_MODEL_87f8a4e0e5f14751a7f19273118aaafe","value":" 784/784 [00:00&lt;00:00, 5937.96 examples/s]"}},"74e35720b9074ccdba2e495ffd0895dd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3927b957970548da96f830871fffdbf9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b666036aa98f42d48a97b224da3fd77c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"41032241335949a18ed6d8b7e167b581":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e5e886e61cf549539175829a2274fdb4":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f7fcd72791de411983343b0e69dbddeb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"87f8a4e0e5f14751a7f19273118aaafe":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}},"accelerator":"GPU"},"nbformat":4,"nbformat_minor":0}